43#define DEBUG_TYPE "amdgpu-disassembler"
46 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
47 : AMDGPU::EncValues::SGPR_MAX_SI)
59 MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&
STI)),
60 CodeObjectVersion(
AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
62 if (!
STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !
isGFX10Plus())
66 createConstantSymbolExpr(Symbol, Code);
68 UCVersionW64Expr = createConstantSymbolExpr(
"UC_VERSION_W64_BIT", 0x2000);
69 UCVersionW32Expr = createConstantSymbolExpr(
"UC_VERSION_W32_BIT", 0x4000);
70 UCVersionMDPExpr = createConstantSymbolExpr(
"UC_VERSION_MDP_BIT", 0x8000);
86 AMDGPU::OpName Name) {
87 int OpIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), Name);
104 if (DAsm->tryAddingSymbolicOperand(Inst,
Offset, Addr,
true, 2, 2, 0))
113 if (DAsm->isGFX12Plus()) {
115 }
else if (DAsm->isVI()) {
126 return addOperand(Inst, DAsm->decodeBoolReg(Val));
133 return addOperand(Inst, DAsm->decodeSplitBarrier(Val));
139 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
142#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
143 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
145 const MCDisassembler *Decoder) { \
146 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
147 return addOperand(Inst, DAsm->DecoderName(Imm)); \
152#define DECODE_OPERAND_REG_8(RegClass) \
153 static DecodeStatus Decode##RegClass##RegisterClass( \
154 MCInst &Inst, unsigned Imm, uint64_t , \
155 const MCDisassembler *Decoder) { \
156 assert(Imm < (1 << 8) && "8-bit encoding"); \
157 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
159 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
162#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm) \
163 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t , \
164 const MCDisassembler *Decoder) { \
165 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
166 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
167 return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm)); \
171 unsigned OpWidth,
unsigned Imm,
unsigned EncImm,
173 assert(Imm < (1U << EncSize) &&
"Operand doesn't fit encoding!");
175 return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm));
180#define DECODE_OPERAND_SREG_7(RegClass, OpWidth) \
181 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm)
183#define DECODE_OPERAND_SREG_8(RegClass, OpWidth) \
184 DECODE_SrcOp(Decode##RegClass##RegisterClass, 8, OpWidth, Imm)
190template <
unsigned OpW
idth>
198template <
unsigned OpW
idth>
202 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
208template <
unsigned OpW
idth>
211 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
216template <
unsigned OpW
idth>
220 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, Decoder);
228template <
unsigned OpW
idth>
232 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
237template <
unsigned OpW
idth>
241 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
289 assert((Imm & (1 << 8)) == 0 &&
"Imm{8} should not be used");
291 bool IsHi = Imm & (1 << 9);
292 unsigned RegIdx = Imm & 0xff;
294 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
302 bool IsHi = Imm & (1 << 7);
303 unsigned RegIdx = Imm & 0x7f;
305 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
308template <
unsigned OpW
idth>
316 bool IsHi = Imm & (1 << 7);
317 unsigned RegIdx = Imm & 0x7f;
318 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
320 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(OpWidth, Imm & 0xFF));
323template <
unsigned OpW
idth>
331 bool IsHi = Imm & (1 << 9);
332 unsigned RegIdx = Imm & 0xff;
333 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
335 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(OpWidth, Imm & 0xFF));
346 bool IsHi = Imm & (1 << 9);
347 unsigned RegIdx = Imm & 0xff;
348 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
355 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
362 return addOperand(Inst, DAsm->decodeMandatoryLiteral64Constant(Imm));
366 uint64_t Addr,
const void *Decoder) {
368 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
382 return Reg >= AMDGPU::AGPR0 &&
Reg <= AMDGPU::AGPR255;
388 if (!DAsm->isGFX90A()) {
397 uint64_t TSFlags = DAsm->getMCII()->get(
Opc).TSFlags;
399 ? AMDGPU::OpName::data0
400 : AMDGPU::OpName::vdata;
402 int DataIdx = AMDGPU::getNamedOperandIdx(
Opc, DataName);
404 int DstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
410 int Data2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
416 return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
419template <
unsigned Opw>
429 assert(Imm < (1 << 9) &&
"9-bit encoding");
431 return addOperand(Inst, DAsm->decodeSrcOp(64, Imm));
434#define DECODE_SDWA(DecName) \
435DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
445 return addOperand(Inst, DAsm->decodeVersionImm(Imm));
448#include "AMDGPUGenDisassemblerTables.inc"
452template <>
constexpr uint32_t InsnBitWidth<uint32_t> = 32;
453template <>
constexpr uint32_t InsnBitWidth<uint64_t> = 64;
454template <>
constexpr uint32_t InsnBitWidth<std::bitset<96>> = 96;
455template <>
constexpr uint32_t InsnBitWidth<std::bitset<128>> = 128;
462template <
typename InsnType>
470 const auto SavedBytes = Bytes;
477 decodeInstruction(Table, TmpInst, Inst,
Address,
this,
STI);
483 Comments << LocalComments;
490template <
typename InsnType>
495 for (
const uint8_t *
T : {Table1, Table2}) {
506 Bytes = Bytes.
slice(
sizeof(
T));
514 Bytes = Bytes.
slice(8);
516 Bytes = Bytes.
slice(4);
517 return (
Hi << 64) |
Lo;
524 Bytes = Bytes.
slice(8);
526 Bytes = Bytes.
slice(8);
527 return (
Hi << 64) |
Lo;
530void AMDGPUDisassembler::decodeImmOperands(
MCInst &
MI,
532 const MCInstrDesc &
Desc = MCII.get(
MI.getOpcode());
534 if (OpNo >=
MI.getNumOperands())
544 MCOperand &
Op =
MI.getOperand(OpNo);
547 int64_t
Imm =
Op.getImm();
562 switch (OpDesc.OperandType) {
596 unsigned MaxInstBytesNum = std::min((
size_t)TargetMaxInstBytes, Bytes_.
size());
597 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
601 Size = std::min((
size_t)4, Bytes_.
size());
613 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
638 if (
STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
640 Bytes = Bytes_.
slice(4, MaxInstBytesNum - 4);
648 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
650 }
else if (Bytes.size() >= 16 &&
651 STI.hasFeature(AMDGPU::FeatureGFX950Insts)) {
657 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
660 if (Bytes.size() >= 8) {
663 if (
STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
667 if (
STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&
671 if (
STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
678 if (
STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&
682 if (
STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
686 if (
STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
724 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
728 if (Bytes.size() >= 4) {
741 if (
STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
745 if (
STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
749 if (
STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
777 decodeImmOperands(
MI, *MCII);
789 else if (AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::dpp8) !=
801 AMDGPU::OpName::src2_modifiers);
804 if (
MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
805 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
808 AMDGPU::OpName::src2_modifiers);
816 if (MCII->get(
MI.getOpcode()).TSFlags &
818 int CPolPos = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
819 AMDGPU::OpName::cpol);
824 if (
MI.getNumOperands() <= (
unsigned)CPolPos) {
826 AMDGPU::OpName::cpol);
828 MI.getOperand(CPolPos).setImm(
MI.getOperand(CPolPos).getImm() | CPol);
833 if ((MCII->get(
MI.getOpcode()).TSFlags &
835 (
STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
838 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::tfe);
839 if (TFEOpIdx != -1) {
840 auto *TFEIter =
MI.begin();
841 std::advance(TFEIter, TFEOpIdx);
849 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::offset);
850 if (OffsetIdx != -1) {
851 uint32_t Imm =
MI.getOperand(OffsetIdx).getImm();
853 if (SignedOffset < 0)
858 if (MCII->get(
MI.getOpcode()).TSFlags &
861 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::swz);
862 if (SWZOpIdx != -1) {
863 auto *SWZIter =
MI.begin();
864 std::advance(SWZIter, SWZOpIdx);
871 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vaddr0);
873 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::srsrc);
874 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
875 if (VAddr0Idx >= 0 && NSAArgs > 0) {
876 unsigned NSAWords = (NSAArgs + 3) / 4;
877 if (Bytes.size() < 4 * NSAWords)
879 for (
unsigned i = 0; i < NSAArgs; ++i) {
880 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
882 MCII->get(
MI.getOpcode()).operands()[VAddrIdx].RegClass;
885 Bytes = Bytes.slice(4 * NSAWords);
891 if (MCII->get(
MI.getOpcode()).TSFlags &
910 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
911 AMDGPU::OpName::vdst_in);
912 if (VDstIn_Idx != -1) {
913 int Tied = MCII->get(
MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
915 if (Tied != -1 && (
MI.getNumOperands() <= (
unsigned)VDstIn_Idx ||
916 !
MI.getOperand(VDstIn_Idx).isReg() ||
917 MI.getOperand(VDstIn_Idx).getReg() !=
MI.getOperand(Tied).getReg())) {
918 if (
MI.getNumOperands() > (
unsigned)VDstIn_Idx)
919 MI.erase(&
MI.getOperand(VDstIn_Idx));
922 AMDGPU::OpName::vdst_in);
934 MCII->get(
MI.getOpcode()).hasImplicitDefOfPhysReg(AMDGPU::EXEC)) {
935 auto ExecEncoding = MRI.getEncodingValue(AMDGPU::EXEC_LO);
936 if (Bytes_[0] != ExecEncoding)
940 Size = MaxInstBytesNum - Bytes.size();
945 if (
STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
955 if (
MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
956 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
957 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
958 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
959 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
960 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
961 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
962 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
963 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
964 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
965 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
966 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
967 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
968 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
969 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
970 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12) {
978 if (
STI.hasFeature(AMDGPU::FeatureGFX9) ||
979 STI.hasFeature(AMDGPU::FeatureGFX10)) {
983 }
else if (
STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
984 int SDst = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::sdst);
988 AMDGPU::OpName::sdst);
1002 return MO.
setReg(
MRI.getSubReg(MO.
getReg(), AMDGPU::sub0_sub1_sub2_sub3));
1005 MRI.getSubReg(MO.
getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5));
1008 MO.
getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7)) {
1016 BaseReg, AMDGPU::sub0, &
MRI.getRegClass(AMDGPU::VReg_384RegClassID));
1017 return MO.
setReg(NewReg);
1034 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::blgp);
1039 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::cbsz);
1041 unsigned CBSZ =
MI.getOperand(CbszIdx).getImm();
1042 unsigned BLGP =
MI.getOperand(BlgpIdx).getImm();
1046 if (!AdjustedRegClassOpcode ||
1047 AdjustedRegClassOpcode->
Opcode ==
MI.getOpcode())
1050 MI.setOpcode(AdjustedRegClassOpcode->
Opcode);
1052 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
1054 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src1);
1063 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::matrix_a_fmt);
1068 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::matrix_b_fmt);
1070 unsigned FmtA =
MI.getOperand(FmtAIdx).getImm();
1071 unsigned FmtB =
MI.getOperand(FmtBIdx).getImm();
1075 if (!AdjustedRegClassOpcode ||
1076 AdjustedRegClassOpcode->
Opcode ==
MI.getOpcode())
1079 MI.setOpcode(AdjustedRegClassOpcode->
Opcode);
1081 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
1083 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src1);
1101 bool IsVOP3P =
false) {
1103 unsigned Opc =
MI.getOpcode();
1104 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
1105 AMDGPU::OpName::src1_modifiers,
1106 AMDGPU::OpName::src2_modifiers};
1107 for (
int J = 0; J < 3; ++J) {
1108 int OpIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
1112 unsigned Val =
MI.getOperand(
OpIdx).getImm();
1119 }
else if (J == 0) {
1130 const unsigned Opc =
MI.getOpcode();
1132 MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
1133 constexpr std::array<std::tuple<AMDGPU::OpName, AMDGPU::OpName, unsigned>, 4>
1134 OpAndOpMods = {{{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
1136 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
1138 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
1140 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
1142 for (
const auto &[
OpName, OpModsName, OpSelMask] : OpAndOpMods) {
1144 int OpModsIdx = AMDGPU::getNamedOperandIdx(
Opc, OpModsName);
1145 if (
OpIdx == -1 || OpModsIdx == -1)
1152 unsigned OpEnc = MRI.getEncodingValue(
Op.getReg());
1153 const MCOperand &OpMods =
MI.getOperand(OpModsIdx);
1154 unsigned ModVal = OpMods.
getImm();
1155 if (ModVal & OpSelMask) {
1165 constexpr int DST_IDX = 0;
1166 auto Opcode =
MI.getOpcode();
1167 const auto &
Desc = MCII->get(Opcode);
1168 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
1170 if (OldIdx != -1 &&
Desc.getOperandConstraint(
1174 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
1185 assert(
MI.getNumOperands() + 1 < MCII->get(
MI.getOpcode()).getNumOperands());
1188 AMDGPU::OpName::src2_modifiers);
1192 unsigned Opc =
MI.getOpcode();
1195 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vdst_in);
1196 if (VDstInIdx != -1)
1199 unsigned DescNumOps = MCII->get(
Opc).getNumOperands();
1200 if (
MI.getNumOperands() < DescNumOps &&
1205 AMDGPU::OpName::op_sel);
1208 if (
MI.getNumOperands() < DescNumOps &&
1211 AMDGPU::OpName::src0_modifiers);
1213 if (
MI.getNumOperands() < DescNumOps &&
1216 AMDGPU::OpName::src1_modifiers);
1224 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vdst_in);
1225 if (VDstInIdx != -1)
1228 unsigned Opc =
MI.getOpcode();
1229 unsigned DescNumOps = MCII->get(
Opc).getNumOperands();
1230 if (
MI.getNumOperands() < DescNumOps &&
1234 AMDGPU::OpName::op_sel);
1248 if (
MRI.getRegClass(AMDGPU::VGPR_32RegClassID).contains(Sub0))
1249 BaseReg = AMDGPU::VGPR0;
1250 else if (
MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Sub0))
1251 BaseReg = AMDGPU::AGPR0;
1253 assert(BaseReg &&
"Only vector registers expected");
1255 return (Sub0 - BaseReg + NumRegs <= 256) ?
Reg : AMDGPU::NoRegister;
1262 auto TSFlags = MCII->get(
MI.getOpcode()).TSFlags;
1264 int VDstIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1265 AMDGPU::OpName::vdst);
1267 int VDataIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1268 AMDGPU::OpName::vdata);
1270 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vaddr0);
1272 ? AMDGPU::OpName::srsrc
1273 : AMDGPU::OpName::rsrc;
1274 int RsrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), RsrcOpName);
1275 int DMaskIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1276 AMDGPU::OpName::dmask);
1278 int TFEIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1279 AMDGPU::OpName::tfe);
1280 int D16Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1281 AMDGPU::OpName::d16);
1288 if (BaseOpcode->
BVH) {
1294 bool IsAtomic = (VDstIdx != -1);
1298 bool IsPartialNSA =
false;
1299 unsigned AddrSize = Info->VAddrDwords;
1303 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::dim);
1305 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::a16);
1308 const bool IsA16 = (A16Idx != -1 &&
MI.getOperand(A16Idx).
getImm());
1315 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1316 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1317 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
1319 if (!IsVSample && AddrSize > 12)
1322 if (AddrSize > Info->VAddrDwords) {
1323 if (!
STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
1328 IsPartialNSA =
true;
1333 unsigned DMask =
MI.getOperand(DMaskIdx).getImm() & 0xf;
1334 unsigned DstSize = IsGather4 ? 4 : std::max(
llvm::popcount(DMask), 1);
1336 bool D16 = D16Idx >= 0 &&
MI.getOperand(D16Idx).getImm();
1338 DstSize = (DstSize + 1) / 2;
1341 if (TFEIdx != -1 &&
MI.getOperand(TFEIdx).getImm())
1344 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1349 if (NewOpcode == -1)
1354 if (DstSize != Info->VDataDwords) {
1355 auto DataRCID = MCII->get(NewOpcode).operands()[VDataIdx].RegClass;
1359 MCRegister VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
1360 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1363 NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, &NewRC);
1374 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1376 if (
STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1377 AddrSize != Info->VAddrDwords) {
1378 MCRegister VAddrSA =
MI.getOperand(VAddrSAIdx).getReg();
1379 MCRegister VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
1380 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1382 auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass;
1384 NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0, &NewRC);
1390 MI.setOpcode(NewOpcode);
1392 if (NewVdata != AMDGPU::NoRegister) {
1404 assert(AddrSize <= Info->VAddrDwords);
1405 MI.erase(
MI.begin() + VAddr0Idx + AddrSize,
1406 MI.begin() + VAddr0Idx + Info->VAddrDwords);
1414 unsigned Opc =
MI.getOpcode();
1415 unsigned DescNumOps = MCII->get(
Opc).getNumOperands();
1418 if (
MI.getNumOperands() < DescNumOps &&
1422 if (
MI.getNumOperands() < DescNumOps &&
1425 AMDGPU::OpName::op_sel);
1426 if (
MI.getNumOperands() < DescNumOps &&
1429 AMDGPU::OpName::op_sel_hi);
1430 if (
MI.getNumOperands() < DescNumOps &&
1433 AMDGPU::OpName::neg_lo);
1434 if (
MI.getNumOperands() < DescNumOps &&
1437 AMDGPU::OpName::neg_hi);
1442 unsigned Opc =
MI.getOpcode();
1443 unsigned DescNumOps = MCII->get(
Opc).getNumOperands();
1445 if (
MI.getNumOperands() < DescNumOps &&
1449 if (
MI.getNumOperands() < DescNumOps &&
1452 AMDGPU::OpName::src0_modifiers);
1454 if (
MI.getNumOperands() < DescNumOps &&
1457 AMDGPU::OpName::src1_modifiers);
1461 unsigned Opc =
MI.getOpcode();
1462 unsigned DescNumOps = MCII->get(
Opc).getNumOperands();
1466 if (
MI.getNumOperands() < DescNumOps &&
1470 AMDGPU::OpName::op_sel);
1475 assert(HasLiteral &&
"Should have decoded a literal");
1486 const Twine& ErrMsg)
const {
1501 unsigned Val)
const {
1502 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1503 if (Val >= RegCl.getNumRegs())
1505 ": unknown register " +
Twine(Val));
1511 unsigned Val)
const {
1515 switch (SRegClassID) {
1516 case AMDGPU::SGPR_32RegClassID:
1517 case AMDGPU::TTMP_32RegClassID:
1519 case AMDGPU::SGPR_64RegClassID:
1520 case AMDGPU::TTMP_64RegClassID:
1523 case AMDGPU::SGPR_96RegClassID:
1524 case AMDGPU::TTMP_96RegClassID:
1525 case AMDGPU::SGPR_128RegClassID:
1526 case AMDGPU::TTMP_128RegClassID:
1529 case AMDGPU::SGPR_256RegClassID:
1530 case AMDGPU::TTMP_256RegClassID:
1533 case AMDGPU::SGPR_288RegClassID:
1534 case AMDGPU::TTMP_288RegClassID:
1535 case AMDGPU::SGPR_320RegClassID:
1536 case AMDGPU::TTMP_320RegClassID:
1537 case AMDGPU::SGPR_352RegClassID:
1538 case AMDGPU::TTMP_352RegClassID:
1539 case AMDGPU::SGPR_384RegClassID:
1540 case AMDGPU::TTMP_384RegClassID:
1541 case AMDGPU::SGPR_512RegClassID:
1542 case AMDGPU::TTMP_512RegClassID:
1551 if (Val % (1 << shift)) {
1553 <<
": scalar reg isn't aligned " << Val;
1561 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1571 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1573 return errOperand(Val,
"More than one unique literal is illegal");
1583 if (Literal64 != Val)
1584 return errOperand(Val,
"More than one unique literal is illegal");
1587 Literal = Literal64 = Val;
1596 if (Bytes.size() < 4) {
1597 return errOperand(0,
"cannot read literal, inst bytes left " +
1598 Twine(Bytes.size()));
1609 assert(
STI.hasFeature(AMDGPU::Feature64BitLiterals));
1612 if (Bytes.size() < 8) {
1613 return errOperand(0,
"cannot read literal64, inst bytes left " +
1614 Twine(Bytes.size()));
1625 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1627 (
static_cast<int64_t
>(Imm) - INLINE_INTEGER_C_MIN) :
1628 (INLINE_INTEGER_C_POSITIVE_MAX -
static_cast<int64_t
>(Imm)));
1676 return 0x3fc45f306dc9c882;
1738 return VGPR_32RegClassID;
1740 return VReg_64RegClassID;
1742 return VReg_96RegClassID;
1744 return VReg_128RegClassID;
1746 return VReg_160RegClassID;
1748 return VReg_192RegClassID;
1750 return VReg_256RegClassID;
1752 return VReg_288RegClassID;
1754 return VReg_320RegClassID;
1756 return VReg_352RegClassID;
1758 return VReg_384RegClassID;
1760 return VReg_512RegClassID;
1762 return VReg_1024RegClassID;
1773 return AGPR_32RegClassID;
1775 return AReg_64RegClassID;
1777 return AReg_96RegClassID;
1779 return AReg_128RegClassID;
1781 return AReg_160RegClassID;
1783 return AReg_256RegClassID;
1785 return AReg_288RegClassID;
1787 return AReg_320RegClassID;
1789 return AReg_352RegClassID;
1791 return AReg_384RegClassID;
1793 return AReg_512RegClassID;
1795 return AReg_1024RegClassID;
1806 return SGPR_32RegClassID;
1808 return SGPR_64RegClassID;
1810 return SGPR_96RegClassID;
1812 return SGPR_128RegClassID;
1814 return SGPR_160RegClassID;
1816 return SGPR_256RegClassID;
1818 return SGPR_288RegClassID;
1820 return SGPR_320RegClassID;
1822 return SGPR_352RegClassID;
1824 return SGPR_384RegClassID;
1826 return SGPR_512RegClassID;
1837 return TTMP_32RegClassID;
1839 return TTMP_64RegClassID;
1841 return TTMP_128RegClassID;
1843 return TTMP_256RegClassID;
1845 return TTMP_288RegClassID;
1847 return TTMP_320RegClassID;
1849 return TTMP_352RegClassID;
1851 return TTMP_384RegClassID;
1853 return TTMP_512RegClassID;
1861 unsigned TTmpMin =
isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1862 unsigned TTmpMax =
isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1864 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1872 bool IsAGPR = Val & 512;
1875 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1883 unsigned Val)
const {
1886 assert(Val < (1 << 8) &&
"9-bit Src encoding when Val{8} is 0");
1891 static_assert(SGPR_MIN == 0);
1900 if ((INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) ||
1901 (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) ||
1902 Val == LITERAL_CONST)
1905 if (Val == LITERAL64_CONST &&
STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
1928 unsigned Val)
const {
1930 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::vdstX);
1933 unsigned XDstReg = MRI.getEncodingValue(Inst.
getOperand(VDstXInd).
getReg());
1934 Val |= ~XDstReg & 1;
2027 const unsigned Val)
const {
2031 if (
STI.hasFeature(AMDGPU::FeatureGFX9) ||
2032 STI.hasFeature(AMDGPU::FeatureGFX10)) {
2035 if (
int(SDWA9EncValues::SRC_VGPR_MIN) <=
int(Val) &&
2036 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
2038 Val - SDWA9EncValues::SRC_VGPR_MIN);
2040 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
2041 Val <= (
isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
2042 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
2044 Val - SDWA9EncValues::SRC_SGPR_MIN);
2046 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
2047 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
2049 Val - SDWA9EncValues::SRC_TTMP_MIN);
2052 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
2054 if ((INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) ||
2055 (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX))
2060 if (
STI.hasFeature(AMDGPU::FeatureVolcanicIslands))
2076 assert((
STI.hasFeature(AMDGPU::FeatureGFX9) ||
2077 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
2078 "SDWAVopcDst should be present only on GFX9+");
2080 bool IsWave32 =
STI.hasFeature(AMDGPU::FeatureWavefrontSize32);
2082 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
2083 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
2099 return STI.hasFeature(AMDGPU::FeatureWavefrontSize32) ?
decodeSrcOp(32, Val)
2120 auto [
Version, W64, W32, MDP] = Encoding::decode(Imm);
2123 if (Encoding::encode(
Version, W64, W32, MDP) != Imm)
2133 if (
I == Versions.end())
2149 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2155 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2167 return STI.hasFeature(AMDGPU::FeatureGFX11);
2175 return STI.hasFeature(AMDGPU::FeatureGFX12);
2185 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2207 if (PopCount == 1) {
2208 S <<
"bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) <<
')';
2210 S <<
"bits in range ("
2211 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) <<
':'
2212 << (TrailingZeros + BaseBytes * CHAR_BIT) <<
')';
2218#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
2219#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2221 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
2223#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
2225 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
2226 << GET_FIELD(MASK) << '\n'; \
2229#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
2231 if (FourByteBuffer & (MASK)) { \
2232 return createStringError(std::errc::invalid_argument, \
2233 "kernel descriptor " DESC \
2234 " reserved %s set" MSG, \
2235 getBitRangeFromMask((MASK), 0).c_str()); \
2239#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
2240#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
2241 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
2242#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
2243 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
2244#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
2245 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
2258 uint32_t GranulatedWorkitemVGPRCount =
2259 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
2262 (GranulatedWorkitemVGPRCount + 1) *
2265 KdStream << Indent <<
".amdhsa_next_free_vgpr " << NextFreeVGPR <<
'\n';
2286 uint32_t GranulatedWavefrontSGPRCount =
2287 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
2291 "must be zero on gfx10+");
2293 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
2296 KdStream << Indent <<
".amdhsa_reserve_vcc " << 0 <<
'\n';
2298 KdStream << Indent <<
".amdhsa_reserve_flat_scratch " << 0 <<
'\n';
2299 KdStream << Indent <<
".amdhsa_reserve_xnack_mask " << 0 <<
'\n';
2300 KdStream << Indent <<
".amdhsa_next_free_sgpr " << NextFreeSGPR <<
"\n";
2305 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
2307 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
2309 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
2311 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
2317 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
2323 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
2330 PRINT_DIRECTIVE(
".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
2333 "COMPUTE_PGM_RSRC1",
"must be zero pre-gfx9");
2339 COMPUTE_PGM_RSRC1_GFX125_FLAT_SCRATCH_IS_NV);
2342 "COMPUTE_PGM_RSRC1");
2353 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
2355 PRINT_DIRECTIVE(
".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
2356 PRINT_DIRECTIVE(
".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
2359 "COMPUTE_PGM_RSRC1");
2364 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
2376 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2378 PRINT_DIRECTIVE(
".amdhsa_system_sgpr_private_segment_wavefront_offset",
2379 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2381 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2383 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2385 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2387 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2389 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2396 ".amdhsa_exception_fp_ieee_invalid_op",
2397 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2399 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2401 ".amdhsa_exception_fp_ieee_div_zero",
2402 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2404 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2406 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2408 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2410 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2423 KdStream << Indent <<
".amdhsa_accum_offset "
2424 << (
GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2427 PRINT_DIRECTIVE(
".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2430 "COMPUTE_PGM_RSRC3",
"must be zero on gfx90a");
2432 "COMPUTE_PGM_RSRC3",
"must be zero on gfx90a");
2436 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2438 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2441 "SHARED_VGPR_COUNT",
2442 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2446 "COMPUTE_PGM_RSRC3",
2447 "must be zero on gfx12+");
2453 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2455 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2457 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2460 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2463 "COMPUTE_PGM_RSRC3",
2464 "must be zero on gfx10");
2469 "COMPUTE_PGM_RSRC3",
"must be zero on gfx10+");
2474 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2477 "COMPUTE_PGM_RSRC3",
2478 "must be zero on gfx10 or gfx11");
2484 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT);
2486 "ENABLE_DYNAMIC_VGPR", COMPUTE_PGM_RSRC3_GFX125_ENABLE_DYNAMIC_VGPR);
2488 COMPUTE_PGM_RSRC3_GFX125_TCP_SPLIT);
2490 "ENABLE_DIDT_THROTTLE",
2491 COMPUTE_PGM_RSRC3_GFX125_ENABLE_DIDT_THROTTLE);
2494 "COMPUTE_PGM_RSRC3",
2495 "must be zero on gfx10+");
2500 "COMPUTE_PGM_RSRC3",
"must be zero on gfx10+");
2505 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2508 "COMPUTE_PGM_RSRC3",
2509 "must be zero on gfx10");
2511 }
else if (FourByteBuffer) {
2513 std::errc::invalid_argument,
2514 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2518#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2519#undef PRINT_DIRECTIVE
2521#undef CHECK_RESERVED_BITS_IMPL
2522#undef CHECK_RESERVED_BITS
2523#undef CHECK_RESERVED_BITS_MSG
2524#undef CHECK_RESERVED_BITS_DESC
2525#undef CHECK_RESERVED_BITS_DESC_MSG
2530 const char *Msg =
"") {
2532 std::errc::invalid_argument,
"kernel descriptor reserved %s set%s%s",
2539 unsigned WidthInBytes) {
2543 std::errc::invalid_argument,
2544 "kernel descriptor reserved bits in range (%u:%u) set",
2545 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2551#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2553 KdStream << Indent << DIRECTIVE " " \
2554 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2563 assert(Bytes.size() == 64);
2566 switch (Cursor.tell()) {
2568 FourByteBuffer = DE.
getU32(Cursor);
2569 KdStream << Indent <<
".amdhsa_group_segment_fixed_size " << FourByteBuffer
2574 FourByteBuffer = DE.
getU32(Cursor);
2575 KdStream << Indent <<
".amdhsa_private_segment_fixed_size "
2576 << FourByteBuffer <<
'\n';
2580 FourByteBuffer = DE.
getU32(Cursor);
2581 KdStream << Indent <<
".amdhsa_kernarg_size "
2582 << FourByteBuffer <<
'\n';
2587 ReservedBytes = DE.
getBytes(Cursor, 4);
2588 for (
int I = 0;
I < 4; ++
I) {
2589 if (ReservedBytes[
I] != 0)
2603 ReservedBytes = DE.
getBytes(Cursor, 20);
2604 for (
int I = 0;
I < 20; ++
I) {
2605 if (ReservedBytes[
I] != 0)
2611 FourByteBuffer = DE.
getU32(Cursor);
2615 FourByteBuffer = DE.
getU32(Cursor);
2619 FourByteBuffer = DE.
getU32(Cursor);
2624 TwoByteBuffer = DE.
getU16(Cursor);
2628 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2630 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2632 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2634 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2636 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2639 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2641 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2644 KERNEL_CODE_PROPERTY_USES_CU_STORES);
2646 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2652 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2654 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2659 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2664 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2666 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2675 TwoByteBuffer = DE.
getU16(Cursor);
2676 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2678 KERNARG_PRELOAD_SPEC_LENGTH);
2681 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2683 KERNARG_PRELOAD_SPEC_OFFSET);
2689 ReservedBytes = DE.
getBytes(Cursor, 4);
2690 for (
int I = 0;
I < 4; ++
I) {
2691 if (ReservedBytes[
I] != 0)
2700#undef PRINT_DIRECTIVE
2707 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2709 "kernel descriptor must be 64-byte aligned");
2720 EnableWavefrontSize32 =
2722 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2727 KdStream <<
".amdhsa_kernel " << KdName <<
'\n';
2730 while (
C &&
C.tell() < Bytes.size()) {
2738 KdStream <<
".end_amdhsa_kernel\n";
2757 "code object v2 is not supported");
2770const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(
StringRef Id,
2773 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2781 if (!Valid || Res != Val)
2782 Ctx.reportWarning(
SMLoc(),
"unsupported redefinition of " + Id);
2788 const uint64_t TSFlags = MCII->get(
MI.getOpcode()).TSFlags;
2823 if (Result != Symbols->end()) {
2824 auto *Sym =
Ctx.getOrCreateSymbol(Result->Name);
2830 ReferencedAddresses.push_back(
static_cast<uint64_t>(
Value));
2849 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
unsigned const MachineRegisterInfo * MRI
MCDisassembler::DecodeStatus DecodeStatus
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
#define CHECK_RESERVED_BITS_DESC(MASK, DESC)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, AMDGPU::OpName Name)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_KImmFP64(MCInst &Inst, uint64_t Imm, uint64_t Addr, const MCDisassembler *Decoder)
static SmallString< 32 > getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes)
Print a string describing the reserved bit range specified by Mask with offset BaseBytes for use in e...
#define DECODE_OPERAND_SREG_8(RegClass, OpWidth)
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static std::bitset< 128 > eat16Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_SREG_7(RegClass, OpWidth)
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, unsigned OpWidth, unsigned Imm, unsigned EncImm, const MCDisassembler *Decoder)
static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValBF16(unsigned Imm)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_8(RegClass)
static unsigned CheckVGPROverflow(unsigned Reg, const MCRegisterClass &RC, const MCRegisterInfo &MRI)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define CHECK_RESERVED_BITS(MASK)
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_MSG(MASK, MSG)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValF16(unsigned Imm)
static std::bitset< 96 > eat12Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static Error createReservedKDBytesError(unsigned BaseInBytes, unsigned WidthInBytes)
Create an error object to return from onSymbolStart for reserved kernel descriptor bytes being set.
static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)
static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, const char *Msg="")
Create an error object to return from onSymbolStart for reserved kernel descriptor bits being set.
static void adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo &MRI, MCOperand &MO, uint8_t NumRegs)
Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the appropriate subregister fo...
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
#define LLVM_EXTERNAL_VISIBILITY
MachineInstr unsigned OpIdx
Interface definition for SIRegisterInfo.
MCOperand decodeLiteral64Constant() const
void convertVOPC64DPPInst(MCInst &MI) const
bool isBufferInstruction(const MCInst &MI) const
Check if the instruction is a buffer operation (MUBUF, MTBUF, or S_BUFFER)
bool hasKernargPreload() const
void convertEXPInst(MCInst &MI) const
MCOperand createRegOperand(unsigned int RegId) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
Expected< bool > decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
Expected< bool > decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
void convertVOPCDPPInst(MCInst &MI) const
MCOperand decodeSpecialReg96Plus(unsigned Val) const
MCOperand decodeSDWASrc32(unsigned Val) const
void setABIVersion(unsigned Version) override
ELF-specific, set the ABI version from the object header.
Expected< bool > decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
unsigned getAgprClassId(unsigned Width) const
MCOperand decodeDpp8FI(unsigned Val) const
MCOperand decodeSDWASrc(unsigned Width, unsigned Val) const
void convertFMAanyK(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
MCOperand decodeBoolReg(unsigned Val) const
void convertDPP8Inst(MCInst &MI) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
MCOperand decodeVersionImm(unsigned Imm) const
Expected< bool > decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
MCOperand decodeSplitBarrier(unsigned Val) const
void convertVOP3DPPInst(MCInst &MI) const
void convertTrue16OpSel(MCInst &MI) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
Expected< bool > decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
MCOperand decodeNonVGPRSrcOp(unsigned Width, unsigned Val) const
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand decodeLiteralConstant(bool ExtendFP64) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
void convertVINTERPInst(MCInst &MI) const
void convertSDWAInst(MCInst &MI) const
MCOperand decodeSrcOp(unsigned Width, unsigned Val) const
unsigned getSgprClassId(unsigned Width) const
static MCOperand decodeIntImmed(unsigned Imm)
void convertWMMAInst(MCInst &MI) const
unsigned getVgprClassId(unsigned Width) const
void convertMAIInst(MCInst &MI) const
f8f6f4 instructions have different pseudos depending on the used formats.
bool hasArchitectedFlatScratch() const
unsigned getTtmpClassId(unsigned Width) const
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
MCOperand decodeMandatoryLiteral64Constant(uint64_t Imm) const
void convertMIMGInst(MCInst &MI) const
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
void convertVOP3PDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
MCOperand decodeSDWASrc16(unsigned Val) const
Expected< bool > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address) const override
Used to perform separate target specific disassembly for a particular symbol.
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Lightweight error class with error context and mandatory checking.
Tagged union holding either a T or a Error.
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
const MCRegisterInfo * getRegisterInfo() const
Superclass for all disassemblers.
MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Base class for the full range of assembler expressions which are needed for parsing.
Instances of this class represent a single low-level machine instruction.
unsigned getNumOperands() const
unsigned getOpcode() const
void addOperand(const MCOperand Op)
const MCOperand & getOperand(unsigned i) const
Interface to description of machine instruction set.
Instances of this class represent operands of the MCInst class.
static MCOperand createExpr(const MCExpr *Val)
static MCOperand createReg(MCRegister Reg)
static MCOperand createImm(int64_t Val)
void setReg(MCRegister Reg)
Set the register number.
MCRegister getReg() const
Returns the register number.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getSizeInBits() const
Return the size of the physical register in bits if we are able to determine it.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Generic base class for all target subtargets.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
bool isVariable() const
isVariable - Check if this is a variable symbol.
LLVM_ABI void setVariableValue(const MCExpr *Value)
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Symbolize and annotate disassembled instructions.
Represents a location in source code.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
StringRef - Represent a constant reference to a string, i.e.
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM Value Representation.
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
A raw_ostream that writes to an SmallVector or SmallString.
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
ArrayRef< GFXVersion > getGFXVersions()
EncodingField< Bit, Bit, D > EncodingBit
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isGFX10(const MCSubtargetInfo &STI)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool getSMEMIsBuffer(unsigned Opc)
bool isVOPC64DPP(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
const MFMA_F8F6F4_Info * getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA, unsigned FmtB, unsigned F8F8Opcode)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
bool hasGDS(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool isGFX1250(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool hasVOPD(const MCSubtargetInfo &STI)
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
@ C
The default llvm calling convention, compatible with C.
@ KERNEL_CODE_PROPERTIES_OFFSET
@ GROUP_SEGMENT_FIXED_SIZE_OFFSET
@ COMPUTE_PGM_RSRC3_OFFSET
@ KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET
@ COMPUTE_PGM_RSRC1_OFFSET
@ COMPUTE_PGM_RSRC2_OFFSET
@ PRIVATE_SEGMENT_FIXED_SIZE_OFFSET
value_type read(const void *memory, endianness endian)
Read a value of a particular endianness from memory.
uint16_t read16(const void *P, endianness E)
This is an optimization pass for GlobalISel generic memory operations.
int popcount(T Value) noexcept
Count the number of set bits in a value.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
LLVM_ABI raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
FunctionAddr VTableAddr uintptr_t uintptr_t Version
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Target & getTheGCNTarget()
The target for GCN GPUs.
To bit_cast(const From &from) noexcept
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
std::vector< SymbolInfoTy > SectionSymbolsTy
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.