43#define DEBUG_TYPE "amdgpu-disassembler"
46 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
47 : AMDGPU::EncValues::SGPR_MAX_SI)
59 MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&
STI)),
60 CodeObjectVersion(
AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
62 if (!
STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !
isGFX10Plus())
66 createConstantSymbolExpr(Symbol, Code);
68 UCVersionW64Expr = createConstantSymbolExpr(
"UC_VERSION_W64_BIT", 0x2000);
69 UCVersionW32Expr = createConstantSymbolExpr(
"UC_VERSION_W32_BIT", 0x4000);
70 UCVersionMDPExpr = createConstantSymbolExpr(
"UC_VERSION_MDP_BIT", 0x8000);
86 AMDGPU::OpName Name) {
87 int OpIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), Name);
104 if (DAsm->tryAddingSymbolicOperand(Inst,
Offset, Addr,
true, 2, 2, 0))
113 if (DAsm->isGFX12Plus()) {
115 }
else if (DAsm->isVI()) {
126 return addOperand(Inst, DAsm->decodeBoolReg(Val));
133 return addOperand(Inst, DAsm->decodeSplitBarrier(Val));
139 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
142#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
143 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
145 const MCDisassembler *Decoder) { \
146 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
147 return addOperand(Inst, DAsm->DecoderName(Imm)); \
152#define DECODE_OPERAND_REG_8(RegClass) \
153 static DecodeStatus Decode##RegClass##RegisterClass( \
154 MCInst &Inst, unsigned Imm, uint64_t , \
155 const MCDisassembler *Decoder) { \
156 assert(Imm < (1 << 8) && "8-bit encoding"); \
157 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
159 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
162#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm) \
163 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t , \
164 const MCDisassembler *Decoder) { \
165 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
166 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
167 return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm)); \
171 unsigned OpWidth,
unsigned Imm,
unsigned EncImm,
173 assert(Imm < (1U << EncSize) &&
"Operand doesn't fit encoding!");
175 return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm));
180#define DECODE_OPERAND_SREG_7(RegClass, OpWidth) \
181 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm)
183#define DECODE_OPERAND_SREG_8(RegClass, OpWidth) \
184 DECODE_SrcOp(Decode##RegClass##RegisterClass, 8, OpWidth, Imm)
190template <
unsigned OpW
idth>
198template <
unsigned OpW
idth>
202 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
208template <
unsigned OpW
idth>
211 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
216template <
unsigned OpW
idth>
220 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, Decoder);
228template <
unsigned OpW
idth>
232 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
237template <
unsigned OpW
idth>
241 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
289 assert((Imm & (1 << 8)) == 0 &&
"Imm{8} should not be used");
291 bool IsHi = Imm & (1 << 9);
292 unsigned RegIdx = Imm & 0xff;
294 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
302 bool IsHi = Imm & (1 << 7);
303 unsigned RegIdx = Imm & 0x7f;
305 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
308template <
unsigned OpW
idth>
316 bool IsHi = Imm & (1 << 7);
317 unsigned RegIdx = Imm & 0x7f;
318 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
320 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(OpWidth, Imm & 0xFF));
323template <
unsigned OpW
idth>
331 bool IsHi = Imm & (1 << 9);
332 unsigned RegIdx = Imm & 0xff;
333 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
335 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(OpWidth, Imm & 0xFF));
346 bool IsHi = Imm & (1 << 9);
347 unsigned RegIdx = Imm & 0xff;
348 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
355 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
362 return addOperand(Inst, DAsm->decodeMandatoryLiteral64Constant(Imm));
366 uint64_t Addr,
const void *Decoder) {
368 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
374 return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
377template <
unsigned Opw>
387 assert(Imm < (1 << 9) &&
"9-bit encoding");
389 return addOperand(Inst, DAsm->decodeSrcOp(64, Imm));
392#define DECODE_SDWA(DecName) \
393DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
403 return addOperand(Inst, DAsm->decodeVersionImm(Imm));
406#include "AMDGPUGenDisassemblerTables.inc"
410template <>
constexpr uint32_t InsnBitWidth<uint32_t> = 32;
411template <>
constexpr uint32_t InsnBitWidth<uint64_t> = 64;
412template <>
constexpr uint32_t InsnBitWidth<std::bitset<96>> = 96;
413template <>
constexpr uint32_t InsnBitWidth<std::bitset<128>> = 128;
420template <
typename InsnType>
428 const auto SavedBytes = Bytes;
435 decodeInstruction(Table, TmpInst, Inst,
Address,
this,
STI);
441 Comments << LocalComments;
448template <
typename InsnType>
453 for (
const uint8_t *
T : {Table1, Table2}) {
464 Bytes = Bytes.
slice(
sizeof(
T));
472 Bytes = Bytes.
slice(8);
474 Bytes = Bytes.
slice(4);
475 return (
Hi << 64) |
Lo;
482 Bytes = Bytes.
slice(8);
484 Bytes = Bytes.
slice(8);
485 return (
Hi << 64) |
Lo;
488void AMDGPUDisassembler::decodeImmOperands(
MCInst &
MI,
490 const MCInstrDesc &
Desc = MCII.get(
MI.getOpcode());
492 if (OpNo >=
MI.getNumOperands())
502 MCOperand &
Op =
MI.getOperand(OpNo);
505 int64_t
Imm =
Op.getImm();
520 switch (OpDesc.OperandType) {
554 unsigned MaxInstBytesNum = std::min((
size_t)TargetMaxInstBytes, Bytes_.
size());
555 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
559 Size = std::min((
size_t)4, Bytes_.
size());
571 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
596 if (
STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
598 Bytes = Bytes_.
slice(4, MaxInstBytesNum - 4);
606 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
608 }
else if (Bytes.size() >= 16 &&
609 STI.hasFeature(AMDGPU::FeatureGFX950Insts)) {
615 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
618 if (Bytes.size() >= 8) {
621 if (
STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
625 if (
STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&
629 if (
STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
636 if (
STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&
640 if (
STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
644 if (
STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
682 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
686 if (Bytes.size() >= 4) {
699 if (
STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
703 if (
STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
707 if (
STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
735 decodeImmOperands(
MI, *MCII);
747 else if (AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::dpp8) !=
759 AMDGPU::OpName::src2_modifiers);
762 if (
MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
763 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
766 AMDGPU::OpName::src2_modifiers);
774 if (MCII->get(
MI.getOpcode()).TSFlags &
776 int CPolPos = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
777 AMDGPU::OpName::cpol);
782 if (
MI.getNumOperands() <= (
unsigned)CPolPos) {
784 AMDGPU::OpName::cpol);
786 MI.getOperand(CPolPos).setImm(
MI.getOperand(CPolPos).getImm() | CPol);
791 if ((MCII->get(
MI.getOpcode()).TSFlags &
793 (
STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
796 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::tfe);
797 if (TFEOpIdx != -1) {
798 auto *TFEIter =
MI.begin();
799 std::advance(TFEIter, TFEOpIdx);
807 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::offset);
808 if (OffsetIdx != -1) {
809 uint32_t Imm =
MI.getOperand(OffsetIdx).getImm();
811 if (SignedOffset < 0)
816 if (MCII->get(
MI.getOpcode()).TSFlags &
819 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::swz);
820 if (SWZOpIdx != -1) {
821 auto *SWZIter =
MI.begin();
822 std::advance(SWZIter, SWZOpIdx);
829 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vaddr0);
831 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::srsrc);
832 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
833 if (VAddr0Idx >= 0 && NSAArgs > 0) {
834 unsigned NSAWords = (NSAArgs + 3) / 4;
835 if (Bytes.size() < 4 * NSAWords)
837 for (
unsigned i = 0; i < NSAArgs; ++i) {
838 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
840 MCII->get(
MI.getOpcode()).operands()[VAddrIdx].RegClass;
843 Bytes = Bytes.slice(4 * NSAWords);
849 if (MCII->get(
MI.getOpcode()).TSFlags &
868 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
869 AMDGPU::OpName::vdst_in);
870 if (VDstIn_Idx != -1) {
871 int Tied = MCII->get(
MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
873 if (Tied != -1 && (
MI.getNumOperands() <= (
unsigned)VDstIn_Idx ||
874 !
MI.getOperand(VDstIn_Idx).isReg() ||
875 MI.getOperand(VDstIn_Idx).getReg() !=
MI.getOperand(Tied).getReg())) {
876 if (
MI.getNumOperands() > (
unsigned)VDstIn_Idx)
877 MI.erase(&
MI.getOperand(VDstIn_Idx));
880 AMDGPU::OpName::vdst_in);
892 MCII->get(
MI.getOpcode()).hasImplicitDefOfPhysReg(AMDGPU::EXEC)) {
893 auto ExecEncoding = MRI.getEncodingValue(AMDGPU::EXEC_LO);
894 if (Bytes_[0] != ExecEncoding)
898 Size = MaxInstBytesNum - Bytes.size();
903 if (
STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
913 if (
MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
914 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
915 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
916 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
917 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
918 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
919 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
920 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
921 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
922 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
923 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
924 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
925 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
926 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
927 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
928 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12) {
936 if (
STI.hasFeature(AMDGPU::FeatureGFX9) ||
937 STI.hasFeature(AMDGPU::FeatureGFX10)) {
941 }
else if (
STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
942 int SDst = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::sdst);
946 AMDGPU::OpName::sdst);
960 return MO.
setReg(
MRI.getSubReg(MO.
getReg(), AMDGPU::sub0_sub1_sub2_sub3));
963 MRI.getSubReg(MO.
getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5));
966 MO.
getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7)) {
974 BaseReg, AMDGPU::sub0, &
MRI.getRegClass(AMDGPU::VReg_384RegClassID));
992 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::blgp);
997 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::cbsz);
999 unsigned CBSZ =
MI.getOperand(CbszIdx).getImm();
1000 unsigned BLGP =
MI.getOperand(BlgpIdx).getImm();
1004 if (!AdjustedRegClassOpcode ||
1005 AdjustedRegClassOpcode->
Opcode ==
MI.getOpcode())
1008 MI.setOpcode(AdjustedRegClassOpcode->
Opcode);
1010 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
1012 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src1);
1021 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::matrix_a_fmt);
1026 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::matrix_b_fmt);
1028 unsigned FmtA =
MI.getOperand(FmtAIdx).getImm();
1029 unsigned FmtB =
MI.getOperand(FmtBIdx).getImm();
1033 if (!AdjustedRegClassOpcode ||
1034 AdjustedRegClassOpcode->
Opcode ==
MI.getOpcode())
1037 MI.setOpcode(AdjustedRegClassOpcode->
Opcode);
1039 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
1041 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src1);
1059 bool IsVOP3P =
false) {
1061 unsigned Opc =
MI.getOpcode();
1062 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
1063 AMDGPU::OpName::src1_modifiers,
1064 AMDGPU::OpName::src2_modifiers};
1065 for (
int J = 0; J < 3; ++J) {
1066 int OpIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
1070 unsigned Val =
MI.getOperand(
OpIdx).getImm();
1077 }
else if (J == 0) {
1088 const unsigned Opc =
MI.getOpcode();
1090 MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
1091 constexpr std::array<std::tuple<AMDGPU::OpName, AMDGPU::OpName, unsigned>, 4>
1092 OpAndOpMods = {{{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
1094 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
1096 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
1098 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
1100 for (
const auto &[
OpName, OpModsName, OpSelMask] : OpAndOpMods) {
1102 int OpModsIdx = AMDGPU::getNamedOperandIdx(
Opc, OpModsName);
1103 if (
OpIdx == -1 || OpModsIdx == -1)
1110 unsigned OpEnc = MRI.getEncodingValue(
Op.getReg());
1111 const MCOperand &OpMods =
MI.getOperand(OpModsIdx);
1112 unsigned ModVal = OpMods.
getImm();
1113 if (ModVal & OpSelMask) {
1123 constexpr int DST_IDX = 0;
1124 auto Opcode =
MI.getOpcode();
1125 const auto &
Desc = MCII->get(Opcode);
1126 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
1128 if (OldIdx != -1 &&
Desc.getOperandConstraint(
1132 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
1143 assert(
MI.getNumOperands() + 1 < MCII->get(
MI.getOpcode()).getNumOperands());
1146 AMDGPU::OpName::src2_modifiers);
1150 unsigned Opc =
MI.getOpcode();
1153 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vdst_in);
1154 if (VDstInIdx != -1)
1157 unsigned DescNumOps = MCII->get(
Opc).getNumOperands();
1158 if (
MI.getNumOperands() < DescNumOps &&
1163 AMDGPU::OpName::op_sel);
1166 if (
MI.getNumOperands() < DescNumOps &&
1169 AMDGPU::OpName::src0_modifiers);
1171 if (
MI.getNumOperands() < DescNumOps &&
1174 AMDGPU::OpName::src1_modifiers);
1182 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vdst_in);
1183 if (VDstInIdx != -1)
1186 unsigned Opc =
MI.getOpcode();
1187 unsigned DescNumOps = MCII->get(
Opc).getNumOperands();
1188 if (
MI.getNumOperands() < DescNumOps &&
1192 AMDGPU::OpName::op_sel);
1206 if (
MRI.getRegClass(AMDGPU::VGPR_32RegClassID).contains(Sub0))
1207 BaseReg = AMDGPU::VGPR0;
1208 else if (
MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Sub0))
1209 BaseReg = AMDGPU::AGPR0;
1211 assert(BaseReg &&
"Only vector registers expected");
1213 return (Sub0 - BaseReg + NumRegs <= 256) ?
Reg : AMDGPU::NoRegister;
1220 auto TSFlags = MCII->get(
MI.getOpcode()).TSFlags;
1222 int VDstIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1223 AMDGPU::OpName::vdst);
1225 int VDataIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1226 AMDGPU::OpName::vdata);
1228 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vaddr0);
1230 ? AMDGPU::OpName::srsrc
1231 : AMDGPU::OpName::rsrc;
1232 int RsrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), RsrcOpName);
1233 int DMaskIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1234 AMDGPU::OpName::dmask);
1236 int TFEIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1237 AMDGPU::OpName::tfe);
1238 int D16Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1239 AMDGPU::OpName::d16);
1246 if (BaseOpcode->
BVH) {
1252 bool IsAtomic = (VDstIdx != -1);
1256 bool IsPartialNSA =
false;
1257 unsigned AddrSize = Info->VAddrDwords;
1261 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::dim);
1263 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::a16);
1266 const bool IsA16 = (A16Idx != -1 &&
MI.getOperand(A16Idx).
getImm());
1273 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1274 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1275 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
1277 if (!IsVSample && AddrSize > 12)
1280 if (AddrSize > Info->VAddrDwords) {
1281 if (!
STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
1286 IsPartialNSA =
true;
1291 unsigned DMask =
MI.getOperand(DMaskIdx).getImm() & 0xf;
1292 unsigned DstSize = IsGather4 ? 4 : std::max(
llvm::popcount(DMask), 1);
1294 bool D16 = D16Idx >= 0 &&
MI.getOperand(D16Idx).getImm();
1296 DstSize = (DstSize + 1) / 2;
1299 if (TFEIdx != -1 &&
MI.getOperand(TFEIdx).getImm())
1302 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1307 if (NewOpcode == -1)
1312 if (DstSize != Info->VDataDwords) {
1313 auto DataRCID = MCII->get(NewOpcode).operands()[VDataIdx].RegClass;
1317 MCRegister VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
1318 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1321 NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, &NewRC);
1332 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1334 if (
STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1335 AddrSize != Info->VAddrDwords) {
1336 MCRegister VAddrSA =
MI.getOperand(VAddrSAIdx).getReg();
1337 MCRegister VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
1338 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1340 auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass;
1342 NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0, &NewRC);
1348 MI.setOpcode(NewOpcode);
1350 if (NewVdata != AMDGPU::NoRegister) {
1362 assert(AddrSize <= Info->VAddrDwords);
1363 MI.erase(
MI.begin() + VAddr0Idx + AddrSize,
1364 MI.begin() + VAddr0Idx + Info->VAddrDwords);
1372 unsigned Opc =
MI.getOpcode();
1373 unsigned DescNumOps = MCII->get(
Opc).getNumOperands();
1376 if (
MI.getNumOperands() < DescNumOps &&
1380 if (
MI.getNumOperands() < DescNumOps &&
1383 AMDGPU::OpName::op_sel);
1384 if (
MI.getNumOperands() < DescNumOps &&
1387 AMDGPU::OpName::op_sel_hi);
1388 if (
MI.getNumOperands() < DescNumOps &&
1391 AMDGPU::OpName::neg_lo);
1392 if (
MI.getNumOperands() < DescNumOps &&
1395 AMDGPU::OpName::neg_hi);
1400 unsigned Opc =
MI.getOpcode();
1401 unsigned DescNumOps = MCII->get(
Opc).getNumOperands();
1403 if (
MI.getNumOperands() < DescNumOps &&
1407 if (
MI.getNumOperands() < DescNumOps &&
1410 AMDGPU::OpName::src0_modifiers);
1412 if (
MI.getNumOperands() < DescNumOps &&
1415 AMDGPU::OpName::src1_modifiers);
1419 unsigned Opc =
MI.getOpcode();
1420 unsigned DescNumOps = MCII->get(
Opc).getNumOperands();
1424 if (
MI.getNumOperands() < DescNumOps &&
1428 AMDGPU::OpName::op_sel);
1433 assert(HasLiteral &&
"Should have decoded a literal");
1444 const Twine& ErrMsg)
const {
1459 unsigned Val)
const {
1460 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1461 if (Val >= RegCl.getNumRegs())
1463 ": unknown register " +
Twine(Val));
1469 unsigned Val)
const {
1473 switch (SRegClassID) {
1474 case AMDGPU::SGPR_32RegClassID:
1475 case AMDGPU::TTMP_32RegClassID:
1477 case AMDGPU::SGPR_64RegClassID:
1478 case AMDGPU::TTMP_64RegClassID:
1481 case AMDGPU::SGPR_96RegClassID:
1482 case AMDGPU::TTMP_96RegClassID:
1483 case AMDGPU::SGPR_128RegClassID:
1484 case AMDGPU::TTMP_128RegClassID:
1487 case AMDGPU::SGPR_256RegClassID:
1488 case AMDGPU::TTMP_256RegClassID:
1491 case AMDGPU::SGPR_288RegClassID:
1492 case AMDGPU::TTMP_288RegClassID:
1493 case AMDGPU::SGPR_320RegClassID:
1494 case AMDGPU::TTMP_320RegClassID:
1495 case AMDGPU::SGPR_352RegClassID:
1496 case AMDGPU::TTMP_352RegClassID:
1497 case AMDGPU::SGPR_384RegClassID:
1498 case AMDGPU::TTMP_384RegClassID:
1499 case AMDGPU::SGPR_512RegClassID:
1500 case AMDGPU::TTMP_512RegClassID:
1509 if (Val % (1 << shift)) {
1511 <<
": scalar reg isn't aligned " << Val;
1519 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1529 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1531 return errOperand(Val,
"More than one unique literal is illegal");
1541 if (Literal64 != Val)
1542 return errOperand(Val,
"More than one unique literal is illegal");
1545 Literal = Literal64 = Val;
1554 if (Bytes.size() < 4) {
1555 return errOperand(0,
"cannot read literal, inst bytes left " +
1556 Twine(Bytes.size()));
1567 assert(
STI.hasFeature(AMDGPU::Feature64BitLiterals));
1570 if (Bytes.size() < 8) {
1571 return errOperand(0,
"cannot read literal64, inst bytes left " +
1572 Twine(Bytes.size()));
1583 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1585 (
static_cast<int64_t
>(Imm) - INLINE_INTEGER_C_MIN) :
1586 (INLINE_INTEGER_C_POSITIVE_MAX -
static_cast<int64_t
>(Imm)));
1634 return 0x3fc45f306dc9c882;
1696 return VGPR_32RegClassID;
1698 return VReg_64RegClassID;
1700 return VReg_96RegClassID;
1702 return VReg_128RegClassID;
1704 return VReg_160RegClassID;
1706 return VReg_192RegClassID;
1708 return VReg_256RegClassID;
1710 return VReg_288RegClassID;
1712 return VReg_320RegClassID;
1714 return VReg_352RegClassID;
1716 return VReg_384RegClassID;
1718 return VReg_512RegClassID;
1720 return VReg_1024RegClassID;
1731 return AGPR_32RegClassID;
1733 return AReg_64RegClassID;
1735 return AReg_96RegClassID;
1737 return AReg_128RegClassID;
1739 return AReg_160RegClassID;
1741 return AReg_256RegClassID;
1743 return AReg_288RegClassID;
1745 return AReg_320RegClassID;
1747 return AReg_352RegClassID;
1749 return AReg_384RegClassID;
1751 return AReg_512RegClassID;
1753 return AReg_1024RegClassID;
1764 return SGPR_32RegClassID;
1766 return SGPR_64RegClassID;
1768 return SGPR_96RegClassID;
1770 return SGPR_128RegClassID;
1772 return SGPR_160RegClassID;
1774 return SGPR_256RegClassID;
1776 return SGPR_288RegClassID;
1778 return SGPR_320RegClassID;
1780 return SGPR_352RegClassID;
1782 return SGPR_384RegClassID;
1784 return SGPR_512RegClassID;
1795 return TTMP_32RegClassID;
1797 return TTMP_64RegClassID;
1799 return TTMP_128RegClassID;
1801 return TTMP_256RegClassID;
1803 return TTMP_288RegClassID;
1805 return TTMP_320RegClassID;
1807 return TTMP_352RegClassID;
1809 return TTMP_384RegClassID;
1811 return TTMP_512RegClassID;
1819 unsigned TTmpMin =
isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1820 unsigned TTmpMax =
isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1822 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1830 bool IsAGPR = Val & 512;
1833 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1841 unsigned Val)
const {
1844 assert(Val < (1 << 8) &&
"9-bit Src encoding when Val{8} is 0");
1849 static_assert(SGPR_MIN == 0);
1858 if ((INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) ||
1859 (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) ||
1860 Val == LITERAL_CONST)
1863 if (Val == LITERAL64_CONST &&
STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
1886 unsigned Val)
const {
1888 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::vdstX);
1891 unsigned XDstReg = MRI.getEncodingValue(Inst.
getOperand(VDstXInd).
getReg());
1892 Val |= ~XDstReg & 1;
1985 const unsigned Val)
const {
1989 if (
STI.hasFeature(AMDGPU::FeatureGFX9) ||
1990 STI.hasFeature(AMDGPU::FeatureGFX10)) {
1993 if (
int(SDWA9EncValues::SRC_VGPR_MIN) <=
int(Val) &&
1994 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
1996 Val - SDWA9EncValues::SRC_VGPR_MIN);
1998 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
1999 Val <= (
isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
2000 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
2002 Val - SDWA9EncValues::SRC_SGPR_MIN);
2004 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
2005 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
2007 Val - SDWA9EncValues::SRC_TTMP_MIN);
2010 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
2012 if ((INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) ||
2013 (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX))
2018 if (
STI.hasFeature(AMDGPU::FeatureVolcanicIslands))
2034 assert((
STI.hasFeature(AMDGPU::FeatureGFX9) ||
2035 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
2036 "SDWAVopcDst should be present only on GFX9+");
2038 bool IsWave32 =
STI.hasFeature(AMDGPU::FeatureWavefrontSize32);
2040 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
2041 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
2057 return STI.hasFeature(AMDGPU::FeatureWavefrontSize32) ?
decodeSrcOp(32, Val)
2078 auto [
Version, W64, W32, MDP] = Encoding::decode(Imm);
2081 if (Encoding::encode(
Version, W64, W32, MDP) != Imm)
2091 if (
I == Versions.end())
2107 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2113 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2125 return STI.hasFeature(AMDGPU::FeatureGFX11);
2133 return STI.hasFeature(AMDGPU::FeatureGFX12);
2143 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2165 if (PopCount == 1) {
2166 S <<
"bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) <<
')';
2168 S <<
"bits in range ("
2169 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) <<
':'
2170 << (TrailingZeros + BaseBytes * CHAR_BIT) <<
')';
2176#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
2177#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2179 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
2181#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
2183 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
2184 << GET_FIELD(MASK) << '\n'; \
2187#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
2189 if (FourByteBuffer & (MASK)) { \
2190 return createStringError(std::errc::invalid_argument, \
2191 "kernel descriptor " DESC \
2192 " reserved %s set" MSG, \
2193 getBitRangeFromMask((MASK), 0).c_str()); \
2197#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
2198#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
2199 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
2200#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
2201 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
2202#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
2203 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
2216 uint32_t GranulatedWorkitemVGPRCount =
2217 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
2220 (GranulatedWorkitemVGPRCount + 1) *
2223 KdStream << Indent <<
".amdhsa_next_free_vgpr " << NextFreeVGPR <<
'\n';
2244 uint32_t GranulatedWavefrontSGPRCount =
2245 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
2249 "must be zero on gfx10+");
2251 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
2254 KdStream << Indent <<
".amdhsa_reserve_vcc " << 0 <<
'\n';
2256 KdStream << Indent <<
".amdhsa_reserve_flat_scratch " << 0 <<
'\n';
2257 KdStream << Indent <<
".amdhsa_reserve_xnack_mask " << 0 <<
'\n';
2258 KdStream << Indent <<
".amdhsa_next_free_sgpr " << NextFreeSGPR <<
"\n";
2263 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
2265 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
2267 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
2269 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
2275 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
2281 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
2288 PRINT_DIRECTIVE(
".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
2291 "COMPUTE_PGM_RSRC1",
"must be zero pre-gfx9");
2297 COMPUTE_PGM_RSRC1_GFX125_FLAT_SCRATCH_IS_NV);
2300 "COMPUTE_PGM_RSRC1");
2311 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
2313 PRINT_DIRECTIVE(
".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
2314 PRINT_DIRECTIVE(
".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
2317 "COMPUTE_PGM_RSRC1");
2322 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
2334 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2336 PRINT_DIRECTIVE(
".amdhsa_system_sgpr_private_segment_wavefront_offset",
2337 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2339 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2341 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2343 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2345 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2347 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2354 ".amdhsa_exception_fp_ieee_invalid_op",
2355 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2357 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2359 ".amdhsa_exception_fp_ieee_div_zero",
2360 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2362 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2364 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2366 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2368 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2381 KdStream << Indent <<
".amdhsa_accum_offset "
2382 << (
GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2385 PRINT_DIRECTIVE(
".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2388 "COMPUTE_PGM_RSRC3",
"must be zero on gfx90a");
2390 "COMPUTE_PGM_RSRC3",
"must be zero on gfx90a");
2394 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2396 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2399 "SHARED_VGPR_COUNT",
2400 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2404 "COMPUTE_PGM_RSRC3",
2405 "must be zero on gfx12+");
2411 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2413 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2415 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2418 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2421 "COMPUTE_PGM_RSRC3",
2422 "must be zero on gfx10");
2427 "COMPUTE_PGM_RSRC3",
"must be zero on gfx10+");
2432 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2435 "COMPUTE_PGM_RSRC3",
2436 "must be zero on gfx10 or gfx11");
2442 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT);
2444 "ENABLE_DYNAMIC_VGPR", COMPUTE_PGM_RSRC3_GFX125_ENABLE_DYNAMIC_VGPR);
2446 COMPUTE_PGM_RSRC3_GFX125_TCP_SPLIT);
2448 "ENABLE_DIDT_THROTTLE",
2449 COMPUTE_PGM_RSRC3_GFX125_ENABLE_DIDT_THROTTLE);
2452 "COMPUTE_PGM_RSRC3",
2453 "must be zero on gfx10+");
2458 "COMPUTE_PGM_RSRC3",
"must be zero on gfx10+");
2463 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2466 "COMPUTE_PGM_RSRC3",
2467 "must be zero on gfx10");
2469 }
else if (FourByteBuffer) {
2471 std::errc::invalid_argument,
2472 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2476#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2477#undef PRINT_DIRECTIVE
2479#undef CHECK_RESERVED_BITS_IMPL
2480#undef CHECK_RESERVED_BITS
2481#undef CHECK_RESERVED_BITS_MSG
2482#undef CHECK_RESERVED_BITS_DESC
2483#undef CHECK_RESERVED_BITS_DESC_MSG
2488 const char *Msg =
"") {
2490 std::errc::invalid_argument,
"kernel descriptor reserved %s set%s%s",
2497 unsigned WidthInBytes) {
2501 std::errc::invalid_argument,
2502 "kernel descriptor reserved bits in range (%u:%u) set",
2503 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2509#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2511 KdStream << Indent << DIRECTIVE " " \
2512 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2521 assert(Bytes.size() == 64);
2524 switch (Cursor.tell()) {
2526 FourByteBuffer = DE.
getU32(Cursor);
2527 KdStream << Indent <<
".amdhsa_group_segment_fixed_size " << FourByteBuffer
2532 FourByteBuffer = DE.
getU32(Cursor);
2533 KdStream << Indent <<
".amdhsa_private_segment_fixed_size "
2534 << FourByteBuffer <<
'\n';
2538 FourByteBuffer = DE.
getU32(Cursor);
2539 KdStream << Indent <<
".amdhsa_kernarg_size "
2540 << FourByteBuffer <<
'\n';
2545 ReservedBytes = DE.
getBytes(Cursor, 4);
2546 for (
int I = 0;
I < 4; ++
I) {
2547 if (ReservedBytes[
I] != 0)
2561 ReservedBytes = DE.
getBytes(Cursor, 20);
2562 for (
int I = 0;
I < 20; ++
I) {
2563 if (ReservedBytes[
I] != 0)
2569 FourByteBuffer = DE.
getU32(Cursor);
2573 FourByteBuffer = DE.
getU32(Cursor);
2577 FourByteBuffer = DE.
getU32(Cursor);
2582 TwoByteBuffer = DE.
getU16(Cursor);
2586 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2588 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2590 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2592 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2594 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2597 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2599 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2601 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2607 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2609 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2614 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2619 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2621 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2630 TwoByteBuffer = DE.
getU16(Cursor);
2631 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2633 KERNARG_PRELOAD_SPEC_LENGTH);
2636 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2638 KERNARG_PRELOAD_SPEC_OFFSET);
2644 ReservedBytes = DE.
getBytes(Cursor, 4);
2645 for (
int I = 0;
I < 4; ++
I) {
2646 if (ReservedBytes[
I] != 0)
2655#undef PRINT_DIRECTIVE
2662 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2664 "kernel descriptor must be 64-byte aligned");
2675 EnableWavefrontSize32 =
2677 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2682 KdStream <<
".amdhsa_kernel " << KdName <<
'\n';
2685 while (
C &&
C.tell() < Bytes.size()) {
2693 KdStream <<
".end_amdhsa_kernel\n";
2712 "code object v2 is not supported");
2725const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(
StringRef Id,
2728 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2736 if (!Valid || Res != Val)
2737 Ctx.reportWarning(
SMLoc(),
"unsupported redefinition of " + Id);
2743 const uint64_t TSFlags = MCII->get(
MI.getOpcode()).TSFlags;
2778 if (Result != Symbols->end()) {
2779 auto *Sym =
Ctx.getOrCreateSymbol(Result->Name);
2785 ReferencedAddresses.push_back(
static_cast<uint64_t>(
Value));
2804 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
unsigned const MachineRegisterInfo * MRI
MCDisassembler::DecodeStatus DecodeStatus
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define CHECK_RESERVED_BITS_DESC(MASK, DESC)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, AMDGPU::OpName Name)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_KImmFP64(MCInst &Inst, uint64_t Imm, uint64_t Addr, const MCDisassembler *Decoder)
static SmallString< 32 > getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes)
Print a string describing the reserved bit range specified by Mask with offset BaseBytes for use in e...
#define DECODE_OPERAND_SREG_8(RegClass, OpWidth)
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static std::bitset< 128 > eat16Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_SREG_7(RegClass, OpWidth)
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, unsigned OpWidth, unsigned Imm, unsigned EncImm, const MCDisassembler *Decoder)
static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValBF16(unsigned Imm)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_8(RegClass)
static unsigned CheckVGPROverflow(unsigned Reg, const MCRegisterClass &RC, const MCRegisterInfo &MRI)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define CHECK_RESERVED_BITS(MASK)
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_MSG(MASK, MSG)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValF16(unsigned Imm)
static std::bitset< 96 > eat12Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static Error createReservedKDBytesError(unsigned BaseInBytes, unsigned WidthInBytes)
Create an error object to return from onSymbolStart for reserved kernel descriptor bytes being set.
static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)
static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, const char *Msg="")
Create an error object to return from onSymbolStart for reserved kernel descriptor bits being set.
static void adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo &MRI, MCOperand &MO, uint8_t NumRegs)
Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the appropriate subregister fo...
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
#define LLVM_EXTERNAL_VISIBILITY
MachineInstr unsigned OpIdx
Interface definition for SIRegisterInfo.
MCOperand decodeLiteral64Constant() const
void convertVOPC64DPPInst(MCInst &MI) const
bool isBufferInstruction(const MCInst &MI) const
Check if the instruction is a buffer operation (MUBUF, MTBUF, or S_BUFFER)
bool hasKernargPreload() const
void convertEXPInst(MCInst &MI) const
MCOperand createRegOperand(unsigned int RegId) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
Expected< bool > decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
Expected< bool > decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
void convertVOPCDPPInst(MCInst &MI) const
MCOperand decodeSpecialReg96Plus(unsigned Val) const
MCOperand decodeSDWASrc32(unsigned Val) const
void setABIVersion(unsigned Version) override
ELF-specific, set the ABI version from the object header.
Expected< bool > decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
unsigned getAgprClassId(unsigned Width) const
MCOperand decodeDpp8FI(unsigned Val) const
MCOperand decodeSDWASrc(unsigned Width, unsigned Val) const
void convertFMAanyK(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
MCOperand decodeBoolReg(unsigned Val) const
void convertDPP8Inst(MCInst &MI) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
MCOperand decodeVersionImm(unsigned Imm) const
Expected< bool > decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
MCOperand decodeSplitBarrier(unsigned Val) const
void convertVOP3DPPInst(MCInst &MI) const
void convertTrue16OpSel(MCInst &MI) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
Expected< bool > decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
MCOperand decodeNonVGPRSrcOp(unsigned Width, unsigned Val) const
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand decodeLiteralConstant(bool ExtendFP64) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
void convertVINTERPInst(MCInst &MI) const
void convertSDWAInst(MCInst &MI) const
MCOperand decodeSrcOp(unsigned Width, unsigned Val) const
unsigned getSgprClassId(unsigned Width) const
static MCOperand decodeIntImmed(unsigned Imm)
void convertWMMAInst(MCInst &MI) const
unsigned getVgprClassId(unsigned Width) const
void convertMAIInst(MCInst &MI) const
f8f6f4 instructions have different pseudos depending on the used formats.
bool hasArchitectedFlatScratch() const
unsigned getTtmpClassId(unsigned Width) const
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
MCOperand decodeMandatoryLiteral64Constant(uint64_t Imm) const
void convertMIMGInst(MCInst &MI) const
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
void convertVOP3PDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
MCOperand decodeSDWASrc16(unsigned Val) const
Expected< bool > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address) const override
Used to perform separate target specific disassembly for a particular symbol.
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Lightweight error class with error context and mandatory checking.
Tagged union holding either a T or a Error.
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
const MCRegisterInfo * getRegisterInfo() const
Superclass for all disassemblers.
MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Base class for the full range of assembler expressions which are needed for parsing.
Instances of this class represent a single low-level machine instruction.
unsigned getOpcode() const
void addOperand(const MCOperand Op)
const MCOperand & getOperand(unsigned i) const
Interface to description of machine instruction set.
Instances of this class represent operands of the MCInst class.
static MCOperand createExpr(const MCExpr *Val)
static MCOperand createReg(MCRegister Reg)
static MCOperand createImm(int64_t Val)
void setReg(MCRegister Reg)
Set the register number.
MCRegister getReg() const
Returns the register number.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getSizeInBits() const
Return the size of the physical register in bits if we are able to determine it.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Generic base class for all target subtargets.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
bool isVariable() const
isVariable - Check if this is a variable symbol.
LLVM_ABI void setVariableValue(const MCExpr *Value)
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Symbolize and annotate disassembled instructions.
Represents a location in source code.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
StringRef - Represent a constant reference to a string, i.e.
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM Value Representation.
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
A raw_ostream that writes to an SmallVector or SmallString.
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
ArrayRef< GFXVersion > getGFXVersions()
EncodingField< Bit, Bit, D > EncodingBit
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isGFX10(const MCSubtargetInfo &STI)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool getSMEMIsBuffer(unsigned Opc)
bool isVOPC64DPP(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
const MFMA_F8F6F4_Info * getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA, unsigned FmtB, unsigned F8F8Opcode)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
bool hasGDS(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool isGFX1250(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool hasVOPD(const MCSubtargetInfo &STI)
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
@ C
The default llvm calling convention, compatible with C.
@ KERNEL_CODE_PROPERTIES_OFFSET
@ GROUP_SEGMENT_FIXED_SIZE_OFFSET
@ COMPUTE_PGM_RSRC3_OFFSET
@ KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET
@ COMPUTE_PGM_RSRC1_OFFSET
@ COMPUTE_PGM_RSRC2_OFFSET
@ PRIVATE_SEGMENT_FIXED_SIZE_OFFSET
value_type read(const void *memory, endianness endian)
Read a value of a particular endianness from memory.
uint16_t read16(const void *P, endianness E)
This is an optimization pass for GlobalISel generic memory operations.
int popcount(T Value) noexcept
Count the number of set bits in a value.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
LLVM_ABI raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
FunctionAddr VTableAddr uintptr_t uintptr_t Version
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Target & getTheGCNTarget()
The target for GCN GPUs.
To bit_cast(const From &from) noexcept
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
std::vector< SymbolInfoTy > SectionSymbolsTy
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.