43#define DEBUG_TYPE "amdgpu-disassembler"
46 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
47 : AMDGPU::EncValues::SGPR_MAX_SI)
59 MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
60 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
66 createConstantSymbolExpr(Symbol, Code);
68 UCVersionW64Expr = createConstantSymbolExpr(
"UC_VERSION_W64_BIT", 0x2000);
69 UCVersionW32Expr = createConstantSymbolExpr(
"UC_VERSION_W32_BIT", 0x4000);
70 UCVersionMDPExpr = createConstantSymbolExpr(
"UC_VERSION_MDP_BIT", 0x8000);
86 AMDGPU::OpName
Name) {
87 int OpIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
Name);
102 int64_t
Offset = SignExtend64<16>(Imm) * 4 + 4 +
Addr;
104 if (DAsm->tryAddingSymbolicOperand(Inst,
Offset,
Addr,
true, 2, 2, 0))
113 if (DAsm->isGFX12Plus()) {
114 Offset = SignExtend64<24>(Imm);
115 }
else if (DAsm->isVI()) {
118 Offset = SignExtend64<21>(Imm);
126 return addOperand(Inst, DAsm->decodeBoolReg(Val));
133 return addOperand(Inst, DAsm->decodeSplitBarrier(Val));
139 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
142#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
143 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
145 const MCDisassembler *Decoder) { \
146 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
147 return addOperand(Inst, DAsm->DecoderName(Imm)); \
152#define DECODE_OPERAND_REG_8(RegClass) \
153 static DecodeStatus Decode##RegClass##RegisterClass( \
154 MCInst &Inst, unsigned Imm, uint64_t , \
155 const MCDisassembler *Decoder) { \
156 assert(Imm < (1 << 8) && "8-bit encoding"); \
157 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
159 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
162#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm) \
163 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t , \
164 const MCDisassembler *Decoder) { \
165 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
166 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
167 return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm)); \
171 unsigned OpWidth,
unsigned Imm,
unsigned EncImm,
173 assert(Imm < (1U << EncSize) &&
"Operand doesn't fit encoding!");
175 return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm));
180#define DECODE_OPERAND_SREG_7(RegClass, OpWidth) \
181 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm)
183#define DECODE_OPERAND_SREG_8(RegClass, OpWidth) \
184 DECODE_SrcOp(Decode##RegClass##RegisterClass, 8, OpWidth, Imm)
190template <
unsigned OpW
idth>
198template <
unsigned OpW
idth>
202 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
208template <
unsigned OpW
idth>
211 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
216template <
unsigned OpW
idth>
220 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, Decoder);
228template <
unsigned OpW
idth>
232 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
237template <
unsigned OpW
idth>
241 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
288 assert(isUInt<10>(Imm) &&
"10-bit encoding expected");
289 assert((Imm & (1 << 8)) == 0 &&
"Imm{8} should not be used");
291 bool IsHi = Imm & (1 << 9);
292 unsigned RegIdx = Imm & 0xff;
294 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
300 assert(isUInt<8>(Imm) &&
"8-bit encoding expected");
302 bool IsHi = Imm & (1 << 7);
303 unsigned RegIdx = Imm & 0x7f;
305 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
308template <
unsigned OpW
idth>
312 assert(isUInt<9>(Imm) &&
"9-bit encoding expected");
316 bool IsHi = Imm & (1 << 7);
317 unsigned RegIdx = Imm & 0x7f;
318 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
320 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(OpWidth, Imm & 0xFF));
323template <
unsigned OpW
idth>
327 assert(isUInt<10>(Imm) &&
"10-bit encoding expected");
331 bool IsHi = Imm & (1 << 9);
332 unsigned RegIdx = Imm & 0xff;
333 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
335 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(OpWidth, Imm & 0xFF));
341 assert(isUInt<10>(Imm) &&
"10-bit encoding expected");
346 bool IsHi = Imm & (1 << 9);
347 unsigned RegIdx = Imm & 0xff;
348 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
355 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
362 return addOperand(Inst, DAsm->decodeMandatoryLiteral64Constant(Imm));
368 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
382 return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
388 if (!DAsm->isGFX90A()) {
397 uint64_t TSFlags = DAsm->getMCII()->get(
Opc).TSFlags;
399 ? AMDGPU::OpName::data0
400 : AMDGPU::OpName::vdata;
402 int DataIdx = AMDGPU::getNamedOperandIdx(
Opc, DataName);
404 int DstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
410 int Data2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
416 return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
419template <
unsigned Opw>
429 assert(Imm < (1 << 9) &&
"9-bit encoding");
431 return addOperand(Inst, DAsm->decodeSrcOp(64, Imm));
434#define DECODE_SDWA(DecName) \
435DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
445 return addOperand(Inst, DAsm->decodeVersionImm(Imm));
448#include "AMDGPUGenDisassemblerTables.inc"
452template <>
constexpr uint32_t InsnBitWidth<uint32_t> = 32;
453template <>
constexpr uint32_t InsnBitWidth<uint64_t> = 64;
454template <>
constexpr uint32_t InsnBitWidth<std::bitset<96>> = 96;
455template <>
constexpr uint32_t InsnBitWidth<std::bitset<128>> = 128;
462template <
typename InsnType>
470 const auto SavedBytes = Bytes;
477 decodeInstruction(Table, TmpInst, Inst,
Address,
this,
STI);
483 Comments << LocalComments;
490template <
typename InsnType>
495 for (
const uint8_t *
T : {Table1, Table2}) {
505 support::endian::read<T, llvm::endianness::little>(Bytes.
data());
506 Bytes = Bytes.
slice(
sizeof(
T));
513 std::bitset<96>
Lo(read<uint64_t, endianness::little>(Bytes.
data()));
514 Bytes = Bytes.
slice(8);
515 std::bitset<96>
Hi(read<uint32_t, endianness::little>(Bytes.
data()));
516 Bytes = Bytes.
slice(4);
517 return (
Hi << 64) |
Lo;
523 std::bitset<128>
Lo(read<uint64_t, endianness::little>(Bytes.
data()));
524 Bytes = Bytes.
slice(8);
525 std::bitset<128>
Hi(read<uint64_t, endianness::little>(Bytes.
data()));
526 Bytes = Bytes.
slice(8);
527 return (
Hi << 64) |
Lo;
530void AMDGPUDisassembler::decodeImmOperands(
MCInst &
MI,
534 if (OpNo >=
MI.getNumOperands())
547 int64_t
Imm =
Op.getImm();
562 switch (OpDesc.OperandType) {
596 unsigned MaxInstBytesNum = std::min((
size_t)TargetMaxInstBytes, Bytes_.
size());
597 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
601 Size = std::min((
size_t)4, Bytes_.
size());
613 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
640 Bytes = Bytes_.
slice(4, MaxInstBytesNum - 4);
648 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
650 }
else if (Bytes.
size() >= 16 &&
657 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
660 if (Bytes.
size() >= 8) {
661 const uint64_t QW = eatBytes<uint64_t>(Bytes);
724 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
728 if (Bytes.
size() >= 4) {
729 const uint32_t DW = eatBytes<uint32_t>(Bytes);
777 decodeImmOperands(
MI, *MCII);
789 else if (AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::dpp8) !=
801 AMDGPU::OpName::src2_modifiers);
804 if (
MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
805 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
808 AMDGPU::OpName::src2_modifiers);
818 int CPolPos = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
819 AMDGPU::OpName::cpol);
824 if (
MI.getNumOperands() <= (
unsigned)CPolPos) {
826 AMDGPU::OpName::cpol);
828 MI.getOperand(CPolPos).setImm(
MI.getOperand(CPolPos).getImm() | CPol);
838 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::tfe);
839 if (TFEOpIdx != -1) {
840 auto *TFEIter =
MI.begin();
841 std::advance(TFEIter, TFEOpIdx);
849 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::swz);
850 if (SWZOpIdx != -1) {
851 auto *SWZIter =
MI.begin();
852 std::advance(SWZIter, SWZOpIdx);
859 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vaddr0);
861 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::srsrc);
862 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
863 if (VAddr0Idx >= 0 && NSAArgs > 0) {
864 unsigned NSAWords = (NSAArgs + 3) / 4;
865 if (Bytes.
size() < 4 * NSAWords)
867 for (
unsigned i = 0; i < NSAArgs; ++i) {
868 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
870 MCII->
get(
MI.getOpcode()).operands()[VAddrIdx].RegClass;
873 Bytes = Bytes.
slice(4 * NSAWords);
898 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
899 AMDGPU::OpName::vdst_in);
900 if (VDstIn_Idx != -1) {
903 if (Tied != -1 && (
MI.getNumOperands() <= (
unsigned)VDstIn_Idx ||
904 !
MI.getOperand(VDstIn_Idx).isReg() ||
905 MI.getOperand(VDstIn_Idx).getReg() !=
MI.getOperand(Tied).getReg())) {
906 if (
MI.getNumOperands() > (
unsigned)VDstIn_Idx)
907 MI.erase(&
MI.getOperand(VDstIn_Idx));
910 AMDGPU::OpName::vdst_in);
924 if (Bytes_[0] != ExecEncoding)
928 Size = MaxInstBytesNum - Bytes.
size();
943 if (
MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
944 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
945 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
946 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
947 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
948 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
949 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
950 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
951 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
952 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
953 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
954 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
955 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
956 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
957 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
958 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12) {
972 int SDst = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::sdst);
976 AMDGPU::OpName::sdst);
990 return MO.
setReg(
MRI.getSubReg(MO.
getReg(), AMDGPU::sub0_sub1_sub2_sub3));
993 MRI.getSubReg(MO.
getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5));
996 MO.
getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7)) {
1004 BaseReg, AMDGPU::sub0, &
MRI.getRegClass(AMDGPU::VReg_384RegClassID));
1005 return MO.
setReg(NewReg);
1022 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::blgp);
1027 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::cbsz);
1029 unsigned CBSZ =
MI.getOperand(CbszIdx).getImm();
1030 unsigned BLGP =
MI.getOperand(BlgpIdx).getImm();
1034 if (!AdjustedRegClassOpcode ||
1035 AdjustedRegClassOpcode->
Opcode ==
MI.getOpcode())
1038 MI.setOpcode(AdjustedRegClassOpcode->
Opcode);
1040 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
1042 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src1);
1051 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::matrix_a_fmt);
1056 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::matrix_b_fmt);
1058 unsigned FmtA =
MI.getOperand(FmtAIdx).getImm();
1059 unsigned FmtB =
MI.getOperand(FmtBIdx).getImm();
1063 if (!AdjustedRegClassOpcode ||
1064 AdjustedRegClassOpcode->
Opcode ==
MI.getOpcode())
1067 MI.setOpcode(AdjustedRegClassOpcode->
Opcode);
1069 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
1071 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src1);
1080 unsigned OpSelHi = 0;
1089 bool IsVOP3P =
false) {
1091 unsigned Opc =
MI.getOpcode();
1092 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
1093 AMDGPU::OpName::src1_modifiers,
1094 AMDGPU::OpName::src2_modifiers};
1095 for (
int J = 0; J < 3; ++J) {
1096 int OpIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
1100 unsigned Val =
MI.getOperand(
OpIdx).getImm();
1107 }
else if (J == 0) {
1118 const unsigned Opc =
MI.getOpcode();
1121 constexpr std::array<std::tuple<AMDGPU::OpName, AMDGPU::OpName, unsigned>, 4>
1122 OpAndOpMods = {{{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
1124 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
1126 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
1128 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
1130 for (
const auto &[
OpName, OpModsName, OpSelMask] : OpAndOpMods) {
1132 int OpModsIdx = AMDGPU::getNamedOperandIdx(
Opc, OpModsName);
1133 if (
OpIdx == -1 || OpModsIdx == -1)
1141 const MCOperand &OpMods =
MI.getOperand(OpModsIdx);
1142 unsigned ModVal = OpMods.
getImm();
1143 if (ModVal & OpSelMask) {
1153 constexpr int DST_IDX = 0;
1154 auto Opcode =
MI.getOpcode();
1155 const auto &
Desc = MCII->
get(Opcode);
1156 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
1158 if (OldIdx != -1 &&
Desc.getOperandConstraint(
1162 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
1176 AMDGPU::OpName::src2_modifiers);
1180 unsigned Opc =
MI.getOpcode();
1183 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vdst_in);
1184 if (VDstInIdx != -1)
1188 if (
MI.getNumOperands() < DescNumOps &&
1193 AMDGPU::OpName::op_sel);
1196 if (
MI.getNumOperands() < DescNumOps &&
1199 AMDGPU::OpName::src0_modifiers);
1201 if (
MI.getNumOperands() < DescNumOps &&
1204 AMDGPU::OpName::src1_modifiers);
1212 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vdst_in);
1213 if (VDstInIdx != -1)
1216 unsigned Opc =
MI.getOpcode();
1218 if (
MI.getNumOperands() < DescNumOps &&
1222 AMDGPU::OpName::op_sel);
1232 int VDstIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1233 AMDGPU::OpName::vdst);
1235 int VDataIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1236 AMDGPU::OpName::vdata);
1238 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vaddr0);
1240 ? AMDGPU::OpName::srsrc
1241 : AMDGPU::OpName::rsrc;
1242 int RsrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), RsrcOpName);
1243 int DMaskIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1244 AMDGPU::OpName::dmask);
1246 int TFEIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1247 AMDGPU::OpName::tfe);
1248 int D16Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1249 AMDGPU::OpName::d16);
1256 if (BaseOpcode->
BVH) {
1262 bool IsAtomic = (VDstIdx != -1);
1266 bool IsPartialNSA =
false;
1267 unsigned AddrSize =
Info->VAddrDwords;
1271 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::dim);
1273 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::a16);
1276 const bool IsA16 = (A16Idx != -1 &&
MI.getOperand(A16Idx).getImm());
1283 IsNSA =
Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1284 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1285 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
1287 if (!IsVSample && AddrSize > 12)
1290 if (AddrSize >
Info->VAddrDwords) {
1296 IsPartialNSA =
true;
1301 unsigned DMask =
MI.getOperand(DMaskIdx).getImm() & 0xf;
1302 unsigned DstSize = IsGather4 ? 4 : std::max(
llvm::popcount(DMask), 1);
1304 bool D16 = D16Idx >= 0 &&
MI.getOperand(D16Idx).getImm();
1306 DstSize = (DstSize + 1) / 2;
1309 if (TFEIdx != -1 &&
MI.getOperand(TFEIdx).getImm())
1312 if (DstSize ==
Info->VDataDwords && AddrSize ==
Info->VAddrDwords)
1317 if (NewOpcode == -1)
1322 if (DstSize !=
Info->VDataDwords) {
1323 auto DataRCID = MCII->
get(NewOpcode).
operands()[VDataIdx].RegClass;
1328 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1341 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1343 if (
STI.
hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1344 AddrSize !=
Info->VAddrDwords) {
1345 MCRegister VAddrSA =
MI.getOperand(VAddrSAIdx).getReg();
1347 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1349 auto AddrRCID = MCII->
get(NewOpcode).
operands()[VAddrSAIdx].RegClass;
1356 MI.setOpcode(NewOpcode);
1358 if (NewVdata != AMDGPU::NoRegister) {
1370 assert(AddrSize <= Info->VAddrDwords);
1371 MI.erase(
MI.begin() + VAddr0Idx + AddrSize,
1372 MI.begin() + VAddr0Idx +
Info->VAddrDwords);
1380 unsigned Opc =
MI.getOpcode();
1384 if (
MI.getNumOperands() < DescNumOps &&
1388 if (
MI.getNumOperands() < DescNumOps &&
1391 AMDGPU::OpName::op_sel);
1392 if (
MI.getNumOperands() < DescNumOps &&
1395 AMDGPU::OpName::op_sel_hi);
1396 if (
MI.getNumOperands() < DescNumOps &&
1399 AMDGPU::OpName::neg_lo);
1400 if (
MI.getNumOperands() < DescNumOps &&
1403 AMDGPU::OpName::neg_hi);
1408 unsigned Opc =
MI.getOpcode();
1411 if (
MI.getNumOperands() < DescNumOps &&
1415 if (
MI.getNumOperands() < DescNumOps &&
1418 AMDGPU::OpName::src0_modifiers);
1420 if (
MI.getNumOperands() < DescNumOps &&
1423 AMDGPU::OpName::src1_modifiers);
1427 unsigned Opc =
MI.getOpcode();
1432 if (
MI.getNumOperands() < DescNumOps &&
1436 AMDGPU::OpName::op_sel);
1441 assert(HasLiteral &&
"Should have decoded a literal");
1452 const Twine& ErrMsg)
const {
1467 unsigned Val)
const {
1468 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1469 if (Val >= RegCl.getNumRegs())
1471 ": unknown register " +
Twine(Val));
1477 unsigned Val)
const {
1481 switch (SRegClassID) {
1482 case AMDGPU::SGPR_32RegClassID:
1483 case AMDGPU::TTMP_32RegClassID:
1485 case AMDGPU::SGPR_64RegClassID:
1486 case AMDGPU::TTMP_64RegClassID:
1489 case AMDGPU::SGPR_96RegClassID:
1490 case AMDGPU::TTMP_96RegClassID:
1491 case AMDGPU::SGPR_128RegClassID:
1492 case AMDGPU::TTMP_128RegClassID:
1495 case AMDGPU::SGPR_256RegClassID:
1496 case AMDGPU::TTMP_256RegClassID:
1499 case AMDGPU::SGPR_288RegClassID:
1500 case AMDGPU::TTMP_288RegClassID:
1501 case AMDGPU::SGPR_320RegClassID:
1502 case AMDGPU::TTMP_320RegClassID:
1503 case AMDGPU::SGPR_352RegClassID:
1504 case AMDGPU::TTMP_352RegClassID:
1505 case AMDGPU::SGPR_384RegClassID:
1506 case AMDGPU::TTMP_384RegClassID:
1507 case AMDGPU::SGPR_512RegClassID:
1508 case AMDGPU::TTMP_512RegClassID:
1517 if (Val % (1 << shift)) {
1519 <<
": scalar reg isn't aligned " << Val;
1527 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1537 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1539 return errOperand(Val,
"More than one unique literal is illegal");
1549 if (Literal64 != Val)
1550 return errOperand(Val,
"More than one unique literal is illegal");
1553 Literal = Literal64 = Val;
1562 if (Bytes.
size() < 4) {
1563 return errOperand(0,
"cannot read literal, inst bytes left " +
1567 Literal = Literal64 = eatBytes<uint32_t>(Bytes);
1578 if (Bytes.
size() < 8) {
1579 return errOperand(0,
"cannot read literal64, inst bytes left " +
1583 Literal64 = eatBytes<uint64_t>(Bytes);
1589 using namespace AMDGPU::EncValues;
1591 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1593 (
static_cast<int64_t
>(Imm) - INLINE_INTEGER_C_MIN) :
1594 (INLINE_INTEGER_C_POSITIVE_MAX -
static_cast<int64_t
>(Imm)));
1601 return llvm::bit_cast<uint32_t>(0.5f);
1603 return llvm::bit_cast<uint32_t>(-0.5f);
1605 return llvm::bit_cast<uint32_t>(1.0f);
1607 return llvm::bit_cast<uint32_t>(-1.0f);
1609 return llvm::bit_cast<uint32_t>(2.0f);
1611 return llvm::bit_cast<uint32_t>(-2.0f);
1613 return llvm::bit_cast<uint32_t>(4.0f);
1615 return llvm::bit_cast<uint32_t>(-4.0f);
1626 return llvm::bit_cast<uint64_t>(0.5);
1628 return llvm::bit_cast<uint64_t>(-0.5);
1630 return llvm::bit_cast<uint64_t>(1.0);
1632 return llvm::bit_cast<uint64_t>(-1.0);
1634 return llvm::bit_cast<uint64_t>(2.0);
1636 return llvm::bit_cast<uint64_t>(-2.0);
1638 return llvm::bit_cast<uint64_t>(4.0);
1640 return llvm::bit_cast<uint64_t>(-4.0);
1642 return 0x3fc45f306dc9c882;
1699 using namespace AMDGPU;
1704 return VGPR_32RegClassID;
1706 return VReg_64RegClassID;
1708 return VReg_96RegClassID;
1710 return VReg_128RegClassID;
1712 return VReg_160RegClassID;
1714 return VReg_192RegClassID;
1716 return VReg_256RegClassID;
1718 return VReg_288RegClassID;
1720 return VReg_320RegClassID;
1722 return VReg_352RegClassID;
1724 return VReg_384RegClassID;
1726 return VReg_512RegClassID;
1728 return VReg_1024RegClassID;
1734 using namespace AMDGPU;
1739 return AGPR_32RegClassID;
1741 return AReg_64RegClassID;
1743 return AReg_96RegClassID;
1745 return AReg_128RegClassID;
1747 return AReg_160RegClassID;
1749 return AReg_256RegClassID;
1751 return AReg_288RegClassID;
1753 return AReg_320RegClassID;
1755 return AReg_352RegClassID;
1757 return AReg_384RegClassID;
1759 return AReg_512RegClassID;
1761 return AReg_1024RegClassID;
1767 using namespace AMDGPU;
1772 return SGPR_32RegClassID;
1774 return SGPR_64RegClassID;
1776 return SGPR_96RegClassID;
1778 return SGPR_128RegClassID;
1780 return SGPR_160RegClassID;
1782 return SGPR_256RegClassID;
1784 return SGPR_288RegClassID;
1786 return SGPR_320RegClassID;
1788 return SGPR_352RegClassID;
1790 return SGPR_384RegClassID;
1792 return SGPR_512RegClassID;
1798 using namespace AMDGPU;
1803 return TTMP_32RegClassID;
1805 return TTMP_64RegClassID;
1807 return TTMP_128RegClassID;
1809 return TTMP_256RegClassID;
1811 return TTMP_288RegClassID;
1813 return TTMP_320RegClassID;
1815 return TTMP_352RegClassID;
1817 return TTMP_384RegClassID;
1819 return TTMP_512RegClassID;
1825 using namespace AMDGPU::EncValues;
1827 unsigned TTmpMin =
isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1828 unsigned TTmpMax =
isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1830 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1834 using namespace AMDGPU::EncValues;
1838 bool IsAGPR = Val & 512;
1841 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1849 unsigned Val)
const {
1852 assert(Val < (1 << 8) &&
"9-bit Src encoding when Val{8} is 0");
1853 using namespace AMDGPU::EncValues;
1857 static_assert(SGPR_MIN == 0);
1866 if ((INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) ||
1867 (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) ||
1868 Val == LITERAL_CONST)
1871 if (Val == LITERAL64_CONST &&
STI.
hasFeature(AMDGPU::Feature64BitLiterals)) {
1894 unsigned Val)
const {
1896 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::vdstX);
1900 Val |= ~XDstReg & 1;
1905 using namespace AMDGPU;
1943 using namespace AMDGPU;
1975 using namespace AMDGPU;
1993 const unsigned Val)
const {
1994 using namespace AMDGPU::SDWA;
1995 using namespace AMDGPU::EncValues;
2001 if (
int(SDWA9EncValues::SRC_VGPR_MIN) <=
int(Val) &&
2002 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
2004 Val - SDWA9EncValues::SRC_VGPR_MIN);
2006 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
2007 Val <= (
isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
2008 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
2010 Val - SDWA9EncValues::SRC_SGPR_MIN);
2012 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
2013 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
2015 Val - SDWA9EncValues::SRC_TTMP_MIN);
2018 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
2020 if ((INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) ||
2021 (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX))
2040 using namespace AMDGPU::SDWA;
2044 "SDWAVopcDst should be present only on GFX9+");
2046 bool IsWave32 =
STI.
hasFeature(AMDGPU::FeatureWavefrontSize32);
2048 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
2049 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
2086 auto [
Version, W64, W32, MDP] = Encoding::decode(Imm);
2089 if (Encoding::encode(
Version, W64, W32, MDP) != Imm)
2099 if (
I == Versions.end())
2151 return STI.
hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2173 if (PopCount == 1) {
2174 S <<
"bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) <<
')';
2176 S <<
"bits in range ("
2177 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) <<
':'
2178 << (TrailingZeros + BaseBytes * CHAR_BIT) <<
')';
2184#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
2185#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2187 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
2189#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
2191 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
2192 << GET_FIELD(MASK) << '\n'; \
2195#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
2197 if (FourByteBuffer & (MASK)) { \
2198 return createStringError(std::errc::invalid_argument, \
2199 "kernel descriptor " DESC \
2200 " reserved %s set" MSG, \
2201 getBitRangeFromMask((MASK), 0).c_str()); \
2205#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
2206#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
2207 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
2208#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
2209 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
2210#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
2211 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
2216 using namespace amdhsa;
2224 uint32_t GranulatedWorkitemVGPRCount =
2225 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
2228 (GranulatedWorkitemVGPRCount + 1) *
2231 KdStream << Indent <<
".amdhsa_next_free_vgpr " << NextFreeVGPR <<
'\n';
2252 uint32_t GranulatedWavefrontSGPRCount =
2253 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
2257 "must be zero on gfx10+");
2259 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
2262 KdStream << Indent <<
".amdhsa_reserve_vcc " << 0 <<
'\n';
2264 KdStream << Indent <<
".amdhsa_reserve_flat_scratch " << 0 <<
'\n';
2265 KdStream << Indent <<
".amdhsa_reserve_xnack_mask " << 0 <<
'\n';
2266 KdStream << Indent <<
".amdhsa_next_free_sgpr " << NextFreeSGPR <<
"\n";
2271 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
2273 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
2275 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
2277 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
2283 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
2289 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
2296 PRINT_DIRECTIVE(
".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
2299 "COMPUTE_PGM_RSRC1",
"must be zero pre-gfx9");
2305 COMPUTE_PGM_RSRC1_GFX125_FLAT_SCRATCH_IS_NV);
2308 "COMPUTE_PGM_RSRC1");
2319 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
2321 PRINT_DIRECTIVE(
".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
2322 PRINT_DIRECTIVE(
".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
2325 "COMPUTE_PGM_RSRC1");
2330 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
2338 using namespace amdhsa;
2342 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2344 PRINT_DIRECTIVE(
".amdhsa_system_sgpr_private_segment_wavefront_offset",
2345 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2347 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2349 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2351 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2353 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2355 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2362 ".amdhsa_exception_fp_ieee_invalid_op",
2363 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2365 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2367 ".amdhsa_exception_fp_ieee_div_zero",
2368 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2370 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2372 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2374 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2376 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2386 using namespace amdhsa;
2389 KdStream << Indent <<
".amdhsa_accum_offset "
2390 << (
GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2393 PRINT_DIRECTIVE(
".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2396 "COMPUTE_PGM_RSRC3",
"must be zero on gfx90a");
2398 "COMPUTE_PGM_RSRC3",
"must be zero on gfx90a");
2402 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2404 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2407 "SHARED_VGPR_COUNT",
2408 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2412 "COMPUTE_PGM_RSRC3",
2413 "must be zero on gfx12+");
2419 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2421 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2423 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2426 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2429 "COMPUTE_PGM_RSRC3",
2430 "must be zero on gfx10");
2435 "COMPUTE_PGM_RSRC3",
"must be zero on gfx10+");
2440 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2443 "COMPUTE_PGM_RSRC3",
2444 "must be zero on gfx10 or gfx11");
2450 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT);
2452 "ENABLE_DYNAMIC_VGPR", COMPUTE_PGM_RSRC3_GFX125_ENABLE_DYNAMIC_VGPR);
2454 COMPUTE_PGM_RSRC3_GFX125_TCP_SPLIT);
2456 "ENABLE_DIDT_THROTTLE",
2457 COMPUTE_PGM_RSRC3_GFX125_ENABLE_DIDT_THROTTLE);
2460 "COMPUTE_PGM_RSRC3",
2461 "must be zero on gfx10+");
2466 "COMPUTE_PGM_RSRC3",
"must be zero on gfx10+");
2471 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2474 "COMPUTE_PGM_RSRC3",
2475 "must be zero on gfx10");
2477 }
else if (FourByteBuffer) {
2479 std::errc::invalid_argument,
2480 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2484#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2485#undef PRINT_DIRECTIVE
2487#undef CHECK_RESERVED_BITS_IMPL
2488#undef CHECK_RESERVED_BITS
2489#undef CHECK_RESERVED_BITS_MSG
2490#undef CHECK_RESERVED_BITS_DESC
2491#undef CHECK_RESERVED_BITS_DESC_MSG
2496 const char *Msg =
"") {
2498 std::errc::invalid_argument,
"kernel descriptor reserved %s set%s%s",
2505 unsigned WidthInBytes) {
2509 std::errc::invalid_argument,
2510 "kernel descriptor reserved bits in range (%u:%u) set",
2511 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2517#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2519 KdStream << Indent << DIRECTIVE " " \
2520 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2532 switch (Cursor.
tell()) {
2534 FourByteBuffer = DE.
getU32(Cursor);
2535 KdStream << Indent <<
".amdhsa_group_segment_fixed_size " << FourByteBuffer
2540 FourByteBuffer = DE.
getU32(Cursor);
2541 KdStream << Indent <<
".amdhsa_private_segment_fixed_size "
2542 << FourByteBuffer <<
'\n';
2546 FourByteBuffer = DE.
getU32(Cursor);
2547 KdStream << Indent <<
".amdhsa_kernarg_size "
2548 << FourByteBuffer <<
'\n';
2553 ReservedBytes = DE.
getBytes(Cursor, 4);
2554 for (
int I = 0;
I < 4; ++
I) {
2555 if (ReservedBytes[
I] != 0)
2569 ReservedBytes = DE.
getBytes(Cursor, 20);
2570 for (
int I = 0;
I < 20; ++
I) {
2571 if (ReservedBytes[
I] != 0)
2577 FourByteBuffer = DE.
getU32(Cursor);
2581 FourByteBuffer = DE.
getU32(Cursor);
2585 FourByteBuffer = DE.
getU32(Cursor);
2589 using namespace amdhsa;
2590 TwoByteBuffer = DE.
getU16(Cursor);
2594 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2596 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2598 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2600 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2602 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2605 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2607 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2610 KERNEL_CODE_PROPERTY_USES_CU_STORES);
2612 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2618 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2620 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2625 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2630 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2632 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2640 using namespace amdhsa;
2641 TwoByteBuffer = DE.
getU16(Cursor);
2642 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2644 KERNARG_PRELOAD_SPEC_LENGTH);
2647 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2649 KERNARG_PRELOAD_SPEC_OFFSET);
2655 ReservedBytes = DE.
getBytes(Cursor, 4);
2656 for (
int I = 0;
I < 4; ++
I) {
2657 if (ReservedBytes[
I] != 0)
2666#undef PRINT_DIRECTIVE
2673 if (Bytes.
size() != 64 || KdAddress % 64 != 0)
2675 "kernel descriptor must be 64-byte aligned");
2686 EnableWavefrontSize32 =
2688 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2693 KdStream <<
".amdhsa_kernel " << KdName <<
'\n';
2696 while (
C &&
C.tell() < Bytes.
size()) {
2704 KdStream <<
".end_amdhsa_kernel\n";
2723 "code object v2 is not supported");
2736const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(
StringRef Id,
2742 if (!
Sym->isVariable()) {
2746 bool Valid =
Sym->getVariableValue()->evaluateAsAbsolute(Res);
2747 if (!Valid || Res != Val)
2775 if (Result != Symbols->end()) {
2782 ReferencedAddresses.push_back(
static_cast<uint64_t>(
Value));
2801 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
#define CHECK_RESERVED_BITS_DESC(MASK, DESC)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, AMDGPU::OpName Name)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_KImmFP64(MCInst &Inst, uint64_t Imm, uint64_t Addr, const MCDisassembler *Decoder)
static SmallString< 32 > getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes)
Print a string describing the reserved bit range specified by Mask with offset BaseBytes for use in e...
#define DECODE_OPERAND_SREG_8(RegClass, OpWidth)
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static std::bitset< 128 > eat16Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_SREG_7(RegClass, OpWidth)
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, unsigned OpWidth, unsigned Imm, unsigned EncImm, const MCDisassembler *Decoder)
static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValBF16(unsigned Imm)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_8(RegClass)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define CHECK_RESERVED_BITS(MASK)
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_MSG(MASK, MSG)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValF16(unsigned Imm)
static std::bitset< 96 > eat12Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static Error createReservedKDBytesError(unsigned BaseInBytes, unsigned WidthInBytes)
Create an error object to return from onSymbolStart for reserved kernel descriptor bytes being set.
static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)
static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, const char *Msg="")
Create an error object to return from onSymbolStart for reserved kernel descriptor bits being set.
static void adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo &MRI, MCOperand &MO, uint8_t NumRegs)
Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the appropriate subregister fo...
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
Analysis containing CSE Info
#define LLVM_EXTERNAL_VISIBILITY
MachineInstr unsigned OpIdx
Interface definition for SIRegisterInfo.
MCOperand decodeLiteral64Constant() const
void convertVOPC64DPPInst(MCInst &MI) const
bool hasKernargPreload() const
void convertEXPInst(MCInst &MI) const
MCOperand createRegOperand(unsigned int RegId) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
Expected< bool > decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
Expected< bool > decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
void convertVOPCDPPInst(MCInst &MI) const
MCOperand decodeSpecialReg96Plus(unsigned Val) const
MCOperand decodeSDWASrc32(unsigned Val) const
void setABIVersion(unsigned Version) override
ELF-specific, set the ABI version from the object header.
Expected< bool > decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
unsigned getAgprClassId(unsigned Width) const
MCOperand decodeDpp8FI(unsigned Val) const
MCOperand decodeSDWASrc(unsigned Width, unsigned Val) const
void convertFMAanyK(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
MCOperand decodeBoolReg(unsigned Val) const
void convertDPP8Inst(MCInst &MI) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
MCOperand decodeVersionImm(unsigned Imm) const
Expected< bool > decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
MCOperand decodeSplitBarrier(unsigned Val) const
void convertVOP3DPPInst(MCInst &MI) const
void convertTrue16OpSel(MCInst &MI) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
Expected< bool > decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
MCOperand decodeNonVGPRSrcOp(unsigned Width, unsigned Val) const
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand decodeLiteralConstant(bool ExtendFP64) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
void convertVINTERPInst(MCInst &MI) const
void convertSDWAInst(MCInst &MI) const
MCOperand decodeSrcOp(unsigned Width, unsigned Val) const
unsigned getSgprClassId(unsigned Width) const
static MCOperand decodeIntImmed(unsigned Imm)
void convertWMMAInst(MCInst &MI) const
unsigned getVgprClassId(unsigned Width) const
void convertMAIInst(MCInst &MI) const
f8f6f4 instructions have different pseudos depending on the used formats.
bool hasArchitectedFlatScratch() const
unsigned getTtmpClassId(unsigned Width) const
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
MCOperand decodeMandatoryLiteral64Constant(uint64_t Imm) const
void convertMIMGInst(MCInst &MI) const
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
void convertVOP3PDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
MCOperand decodeSDWASrc16(unsigned Val) const
Expected< bool > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address) const override
Used to perform separate target specific disassembly for a particular symbol.
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
This class represents an Operation in the Expression.
Lightweight error class with error context and mandatory checking.
Tagged union holding either a T or a Error.
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
const MCRegisterInfo * getRegisterInfo() const
LLVM_ABI void reportWarning(SMLoc L, const Twine &Msg)
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Superclass for all disassemblers.
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Base class for the full range of assembler expressions which are needed for parsing.
Instances of this class represent a single low-level machine instruction.
unsigned getNumOperands() const
unsigned getOpcode() const
void addOperand(const MCOperand Op)
const MCOperand & getOperand(unsigned i) const
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
LLVM_ABI bool hasImplicitDefOfPhysReg(MCRegister Reg, const MCRegisterInfo *MRI=nullptr) const
Return true if this instruction implicitly defines the specified physical register.
Interface to description of machine instruction set.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Instances of this class represent operands of the MCInst class.
static MCOperand createExpr(const MCExpr *Val)
static MCOperand createReg(MCRegister Reg)
static MCOperand createImm(int64_t Val)
void setReg(MCRegister Reg)
Set the register number.
MCRegister getReg() const
Returns the register number.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const MCRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
uint16_t getEncodingValue(MCRegister Reg) const
Returns the encoding for Reg.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Wrapper class representing physical registers. Should be passed by value.
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Symbolize and annotate disassembled instructions.
Represents a location in source code.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
StringRef - Represent a constant reference to a string, i.e.
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM Value Representation.
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
A raw_ostream that writes to an SmallVector or SmallString.
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
ArrayRef< GFXVersion > getGFXVersions()
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isGFX10(const MCSubtargetInfo &STI)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isVOPC64DPP(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
const MFMA_F8F6F4_Info * getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA, unsigned FmtB, unsigned F8F8Opcode)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
bool hasGDS(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool isGFX1250(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool hasVOPD(const MCSubtargetInfo &STI)
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
@ C
The default llvm calling convention, compatible with C.
@ KERNEL_CODE_PROPERTIES_OFFSET
@ GROUP_SEGMENT_FIXED_SIZE_OFFSET
@ COMPUTE_PGM_RSRC3_OFFSET
@ KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET
@ COMPUTE_PGM_RSRC1_OFFSET
@ COMPUTE_PGM_RSRC2_OFFSET
@ PRIVATE_SEGMENT_FIXED_SIZE_OFFSET
uint16_t read16(const void *P, endianness E)
This is an optimization pass for GlobalISel generic memory operations.
int popcount(T Value) noexcept
Count the number of set bits in a value.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
LLVM_ABI raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Sub
Subtraction of integers.
std::vector< SymbolInfoTy > SectionSymbolsTy
unsigned M0(unsigned Val)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Description of the encoding of one expression Op.
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.