40#define DEBUG_TYPE "amdgpu-disassembler"
43 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
44 : AMDGPU::EncValues::SGPR_MAX_SI)
51 MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
52 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
58 createConstantSymbolExpr(Symbol, Code);
60 UCVersionW64Expr = createConstantSymbolExpr(
"UC_VERSION_W64_BIT", 0x2000);
61 UCVersionW32Expr = createConstantSymbolExpr(
"UC_VERSION_W32_BIT", 0x4000);
62 UCVersionMDPExpr = createConstantSymbolExpr(
"UC_VERSION_MDP_BIT", 0x8000);
78 AMDGPU::OpName
Name) {
79 int OpIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
Name);
82 std::advance(
I, OpIdx);
94 int64_t
Offset = SignExtend64<16>(Imm) * 4 + 4 +
Addr;
96 if (DAsm->tryAddingSymbolicOperand(Inst,
Offset,
Addr,
true, 2, 2, 0))
105 if (DAsm->isGFX12Plus()) {
106 Offset = SignExtend64<24>(Imm);
107 }
else if (DAsm->isVI()) {
110 Offset = SignExtend64<21>(Imm);
118 return addOperand(Inst, DAsm->decodeBoolReg(Val));
125 return addOperand(Inst, DAsm->decodeSplitBarrier(Val));
131 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
134#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
135 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
137 const MCDisassembler *Decoder) { \
138 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
139 return addOperand(Inst, DAsm->DecoderName(Imm)); \
144#define DECODE_OPERAND_REG_8(RegClass) \
145 static DecodeStatus Decode##RegClass##RegisterClass( \
146 MCInst &Inst, unsigned Imm, uint64_t , \
147 const MCDisassembler *Decoder) { \
148 assert(Imm < (1 << 8) && "8-bit encoding"); \
149 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
151 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
154#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm, MandatoryLiteral, \
156 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t , \
157 const MCDisassembler *Decoder) { \
158 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
159 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
160 return addOperand(Inst, \
161 DAsm->decodeSrcOp(AMDGPUDisassembler::OpWidth, EncImm, \
162 MandatoryLiteral, ImmWidth)); \
167 unsigned Imm,
unsigned EncImm,
168 bool MandatoryLiteral,
unsigned ImmWidth,
171 assert(Imm < (1U << EncSize) &&
"Operand doesn't fit encoding!");
173 return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm, MandatoryLiteral,
179#define DECODE_OPERAND_REG_7(RegClass, OpWidth) \
180 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm, false, 0)
186template <AMDGPUDisassembler::OpW
idthTy OpW
idth>
194template <AMDGPUDisassembler::OpW
idthTy OpW
idth>
198 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm,
false, 0,
205template <AMDGPUDisassembler::OpW
idthTy OpW
idth>
208 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512,
false, 0,
214template <AMDGPUDisassembler::OpW
idthTy OpW
idth>
218 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm,
false, 0,
232 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm,
false, ImmWidth,
243 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512,
false, ImmWidth,
252 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm,
true, ImmWidth,
297 assert(isUInt<10>(Imm) &&
"10-bit encoding expected");
298 assert((Imm & (1 << 8)) == 0 &&
"Imm{8} should not be used");
300 bool IsHi = Imm & (1 << 9);
301 unsigned RegIdx = Imm & 0xff;
303 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
309 assert(isUInt<8>(Imm) &&
"8-bit encoding expected");
311 bool IsHi = Imm & (1 << 7);
312 unsigned RegIdx = Imm & 0x7f;
314 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
322 assert(isUInt<9>(Imm) &&
"9-bit encoding expected");
326 bool IsHi = Imm & (1 << 7);
327 unsigned RegIdx = Imm & 0x7f;
328 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
330 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(
331 OpWidth, Imm & 0xFF,
false, ImmWidth,
342 assert(isUInt<9>(Imm) &&
"9-bit encoding expected");
345 bool IsHi = Imm & (1 << 7);
346 unsigned RegIdx = Imm & 0x7f;
347 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
349 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(
350 OpWidth, Imm & 0xFF,
true, ImmWidth,
359 assert(isUInt<10>(Imm) &&
"10-bit encoding expected");
363 bool IsHi = Imm & (1 << 9);
364 unsigned RegIdx = Imm & 0xff;
365 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
367 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(
368 OpWidth, Imm & 0xFF,
false, ImmWidth,
375 assert(isUInt<10>(Imm) &&
"10-bit encoding expected");
380 bool IsHi = Imm & (1 << 9);
381 unsigned RegIdx = Imm & 0xff;
382 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
389 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
395 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
408 auto Reg = Sub ? Sub :
Op.getReg();
409 return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
416 if (!DAsm->isGFX90A()) {
425 uint64_t TSFlags = DAsm->getMCII()->get(Opc).TSFlags;
427 ? AMDGPU::OpName::data0
428 : AMDGPU::OpName::vdata;
430 int DataIdx = AMDGPU::getNamedOperandIdx(Opc, DataName);
432 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
438 int Data2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1);
444 return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
447template <AMDGPUDisassembler::OpW
idthTy Opw>
457 assert(Imm < (1 << 9) &&
"9-bit encoding");
464#define DECODE_SDWA(DecName) \
465DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
475 return addOperand(Inst, DAsm->decodeVersionImm(Imm));
478#include "AMDGPUGenDisassemblerTables.inc"
487 support::endian::read<T, llvm::endianness::little>(Bytes.
data());
488 Bytes = Bytes.
slice(
sizeof(
T));
495 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.
data());
496 Bytes = Bytes.
slice(8);
498 support::endian::read<uint32_t, llvm::endianness::little>(Bytes.
data());
499 Bytes = Bytes.
slice(4);
506 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.
data());
507 Bytes = Bytes.
slice(8);
509 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.
data());
510 Bytes = Bytes.
slice(8);
518 unsigned MaxInstBytesNum = std::min((
size_t)TargetMaxInstBytes, Bytes_.
size());
519 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
523 Size = std::min((
size_t)4, Bytes_.
size());
549 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
551 }
else if (Bytes.
size() >= 16 &&
558 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
561 if (Bytes.
size() >= 8) {
562 const uint64_t QW = eatBytes<uint64_t>(Bytes);
620 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
624 if (Bytes.
size() >= 4) {
625 const uint32_t DW = eatBytes<uint32_t>(Bytes);
676 else if (AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::dpp8) !=
688 AMDGPU::OpName::src2_modifiers);
691 if (
MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
692 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
695 AMDGPU::OpName::src2_modifiers);
703 if (MCII->get(
MI.getOpcode()).TSFlags &
705 int CPolPos = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
706 AMDGPU::OpName::cpol);
711 if (
MI.getNumOperands() <= (
unsigned)CPolPos) {
713 AMDGPU::OpName::cpol);
715 MI.getOperand(CPolPos).setImm(
MI.getOperand(CPolPos).getImm() | CPol);
720 if ((MCII->get(
MI.getOpcode()).TSFlags &
725 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::tfe);
726 if (TFEOpIdx != -1) {
727 auto *TFEIter =
MI.begin();
728 std::advance(TFEIter, TFEOpIdx);
733 if (MCII->get(
MI.getOpcode()).TSFlags &
736 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::swz);
737 if (SWZOpIdx != -1) {
738 auto *SWZIter =
MI.begin();
739 std::advance(SWZIter, SWZOpIdx);
746 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vaddr0);
748 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::srsrc);
749 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
750 if (VAddr0Idx >= 0 && NSAArgs > 0) {
751 unsigned NSAWords = (NSAArgs + 3) / 4;
752 if (Bytes.
size() < 4 * NSAWords)
754 for (
unsigned i = 0; i < NSAArgs; ++i) {
755 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
757 MCII->get(
MI.getOpcode()).operands()[VAddrIdx].RegClass;
760 Bytes = Bytes.
slice(4 * NSAWords);
766 if (MCII->get(
MI.getOpcode()).TSFlags &
782 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
783 AMDGPU::OpName::vdst_in);
784 if (VDstIn_Idx != -1) {
785 int Tied = MCII->get(
MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
787 if (Tied != -1 && (
MI.getNumOperands() <= (
unsigned)VDstIn_Idx ||
788 !
MI.getOperand(VDstIn_Idx).isReg() ||
789 MI.getOperand(VDstIn_Idx).getReg() !=
MI.getOperand(Tied).getReg())) {
790 if (
MI.getNumOperands() > (
unsigned)VDstIn_Idx)
791 MI.erase(&
MI.getOperand(VDstIn_Idx));
794 AMDGPU::OpName::vdst_in);
799 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::imm);
801 if (ImmLitIdx != -1 && !IsSOPK)
804 Size = MaxInstBytesNum - Bytes.
size();
819 if (
MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
820 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
821 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
822 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
823 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
824 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
825 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
826 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
827 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
828 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
829 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
830 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
831 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
832 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
833 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
834 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12) {
848 int SDst = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::sdst);
852 AMDGPU::OpName::sdst);
866 return MO.
setReg(
MRI.getSubReg(MO.
getReg(), AMDGPU::sub0_sub1_sub2_sub3));
869 MRI.getSubReg(MO.
getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5));
885 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::blgp);
890 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::cbsz);
892 unsigned CBSZ =
MI.getOperand(CbszIdx).getImm();
893 unsigned BLGP =
MI.getOperand(BlgpIdx).getImm();
897 if (!AdjustedRegClassOpcode ||
898 AdjustedRegClassOpcode->
Opcode ==
MI.getOpcode())
901 MI.setOpcode(AdjustedRegClassOpcode->
Opcode);
903 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
905 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src1);
914 unsigned OpSelHi = 0;
923 bool IsVOP3P =
false) {
925 unsigned Opc =
MI.getOpcode();
926 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
927 AMDGPU::OpName::src1_modifiers,
928 AMDGPU::OpName::src2_modifiers};
929 for (
int J = 0; J < 3; ++J) {
930 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
934 unsigned Val =
MI.getOperand(OpIdx).getImm();
952 const unsigned Opc =
MI.getOpcode();
955 constexpr std::array<std::tuple<AMDGPU::OpName, AMDGPU::OpName, unsigned>, 4>
956 OpAndOpMods = {{{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
958 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
960 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
962 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
964 for (
const auto &[
OpName, OpModsName, OpSelMask] : OpAndOpMods) {
965 int OpIdx = AMDGPU::getNamedOperandIdx(Opc,
OpName);
966 int OpModsIdx = AMDGPU::getNamedOperandIdx(Opc, OpModsName);
967 if (OpIdx == -1 || OpModsIdx == -1)
975 const MCOperand &OpMods =
MI.getOperand(OpModsIdx);
976 unsigned ModVal = OpMods.
getImm();
977 if (ModVal & OpSelMask) {
987 constexpr int DST_IDX = 0;
988 auto Opcode =
MI.getOpcode();
989 const auto &
Desc = MCII->get(Opcode);
990 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
992 if (OldIdx != -1 &&
Desc.getOperandConstraint(
996 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
1007 assert(
MI.getNumOperands() + 1 < MCII->get(
MI.getOpcode()).getNumOperands());
1010 AMDGPU::OpName::src2_modifiers);
1014 unsigned Opc =
MI.getOpcode();
1017 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vdst_in);
1018 if (VDstInIdx != -1)
1021 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1022 if (
MI.getNumOperands() < DescNumOps &&
1027 AMDGPU::OpName::op_sel);
1030 if (
MI.getNumOperands() < DescNumOps &&
1033 AMDGPU::OpName::src0_modifiers);
1035 if (
MI.getNumOperands() < DescNumOps &&
1038 AMDGPU::OpName::src1_modifiers);
1046 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vdst_in);
1047 if (VDstInIdx != -1)
1050 unsigned Opc =
MI.getOpcode();
1051 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1052 if (
MI.getNumOperands() < DescNumOps &&
1056 AMDGPU::OpName::op_sel);
1064 auto TSFlags = MCII->get(
MI.getOpcode()).TSFlags;
1066 int VDstIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1067 AMDGPU::OpName::vdst);
1069 int VDataIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1070 AMDGPU::OpName::vdata);
1072 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vaddr0);
1074 ? AMDGPU::OpName::srsrc
1075 : AMDGPU::OpName::rsrc;
1076 int RsrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), RsrcOpName);
1077 int DMaskIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1078 AMDGPU::OpName::dmask);
1080 int TFEIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1081 AMDGPU::OpName::tfe);
1082 int D16Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1083 AMDGPU::OpName::d16);
1090 if (BaseOpcode->
BVH) {
1096 bool IsAtomic = (VDstIdx != -1);
1100 bool IsPartialNSA =
false;
1101 unsigned AddrSize =
Info->VAddrDwords;
1105 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::dim);
1107 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::a16);
1110 const bool IsA16 = (A16Idx != -1 &&
MI.getOperand(A16Idx).getImm());
1117 IsNSA =
Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1118 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1119 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
1121 if (!IsVSample && AddrSize > 12)
1124 if (AddrSize >
Info->VAddrDwords) {
1130 IsPartialNSA =
true;
1135 unsigned DMask =
MI.getOperand(DMaskIdx).getImm() & 0xf;
1136 unsigned DstSize = IsGather4 ? 4 : std::max(
llvm::popcount(DMask), 1);
1138 bool D16 = D16Idx >= 0 &&
MI.getOperand(D16Idx).getImm();
1140 DstSize = (DstSize + 1) / 2;
1143 if (TFEIdx != -1 &&
MI.getOperand(TFEIdx).getImm())
1146 if (DstSize ==
Info->VDataDwords && AddrSize ==
Info->VAddrDwords)
1151 if (NewOpcode == -1)
1156 if (DstSize !=
Info->VDataDwords) {
1157 auto DataRCID = MCII->get(NewOpcode).operands()[VDataIdx].RegClass;
1162 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1175 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1177 if (
STI.
hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1178 AddrSize !=
Info->VAddrDwords) {
1179 MCRegister VAddrSA =
MI.getOperand(VAddrSAIdx).getReg();
1181 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1183 auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass;
1190 MI.setOpcode(NewOpcode);
1192 if (NewVdata != AMDGPU::NoRegister) {
1204 assert(AddrSize <= Info->VAddrDwords);
1205 MI.erase(
MI.begin() + VAddr0Idx + AddrSize,
1206 MI.begin() + VAddr0Idx +
Info->VAddrDwords);
1214 unsigned Opc =
MI.getOpcode();
1215 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1218 if (
MI.getNumOperands() < DescNumOps &&
1222 if (
MI.getNumOperands() < DescNumOps &&
1225 AMDGPU::OpName::op_sel);
1226 if (
MI.getNumOperands() < DescNumOps &&
1229 AMDGPU::OpName::op_sel_hi);
1230 if (
MI.getNumOperands() < DescNumOps &&
1233 AMDGPU::OpName::neg_lo);
1234 if (
MI.getNumOperands() < DescNumOps &&
1237 AMDGPU::OpName::neg_hi);
1242 unsigned Opc =
MI.getOpcode();
1243 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1245 if (
MI.getNumOperands() < DescNumOps &&
1249 if (
MI.getNumOperands() < DescNumOps &&
1252 AMDGPU::OpName::src0_modifiers);
1254 if (
MI.getNumOperands() < DescNumOps &&
1257 AMDGPU::OpName::src1_modifiers);
1261 unsigned Opc =
MI.getOpcode();
1262 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1266 if (
MI.getNumOperands() < DescNumOps &&
1270 AMDGPU::OpName::op_sel);
1275 assert(HasLiteral &&
"Should have decoded a literal");
1277 unsigned DescNumOps =
Desc.getNumOperands();
1279 AMDGPU::OpName::immDeferred);
1280 assert(DescNumOps ==
MI.getNumOperands());
1281 for (
unsigned I = 0;
I < DescNumOps; ++
I) {
1282 auto &
Op =
MI.getOperand(
I);
1283 auto OpType =
Desc.operands()[
I].OperandType;
1299 const Twine& ErrMsg)
const {
1314 unsigned Val)
const {
1315 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1316 if (Val >= RegCl.getNumRegs())
1318 ": unknown register " +
Twine(Val));
1324 unsigned Val)
const {
1328 switch (SRegClassID) {
1329 case AMDGPU::SGPR_32RegClassID:
1330 case AMDGPU::TTMP_32RegClassID:
1332 case AMDGPU::SGPR_64RegClassID:
1333 case AMDGPU::TTMP_64RegClassID:
1336 case AMDGPU::SGPR_96RegClassID:
1337 case AMDGPU::TTMP_96RegClassID:
1338 case AMDGPU::SGPR_128RegClassID:
1339 case AMDGPU::TTMP_128RegClassID:
1342 case AMDGPU::SGPR_256RegClassID:
1343 case AMDGPU::TTMP_256RegClassID:
1346 case AMDGPU::SGPR_288RegClassID:
1347 case AMDGPU::TTMP_288RegClassID:
1348 case AMDGPU::SGPR_320RegClassID:
1349 case AMDGPU::TTMP_320RegClassID:
1350 case AMDGPU::SGPR_352RegClassID:
1351 case AMDGPU::TTMP_352RegClassID:
1352 case AMDGPU::SGPR_384RegClassID:
1353 case AMDGPU::TTMP_384RegClassID:
1354 case AMDGPU::SGPR_512RegClassID:
1355 case AMDGPU::TTMP_512RegClassID:
1364 if (Val % (1 << shift)) {
1366 <<
": scalar reg isn't aligned " << Val;
1374 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1384 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1386 return errOperand(Val,
"More than one unique literal is illegal");
1398 if (Bytes.
size() < 4) {
1399 return errOperand(0,
"cannot read literal, inst bytes left " +
1403 Literal = Literal64 = eatBytes<uint32_t>(Bytes);
1411 using namespace AMDGPU::EncValues;
1413 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1415 (
static_cast<int64_t
>(Imm) - INLINE_INTEGER_C_MIN) :
1416 (INLINE_INTEGER_C_POSITIVE_MAX -
static_cast<int64_t
>(Imm)));
1423 return llvm::bit_cast<uint32_t>(0.5f);
1425 return llvm::bit_cast<uint32_t>(-0.5f);
1427 return llvm::bit_cast<uint32_t>(1.0f);
1429 return llvm::bit_cast<uint32_t>(-1.0f);
1431 return llvm::bit_cast<uint32_t>(2.0f);
1433 return llvm::bit_cast<uint32_t>(-2.0f);
1435 return llvm::bit_cast<uint32_t>(4.0f);
1437 return llvm::bit_cast<uint32_t>(-4.0f);
1448 return llvm::bit_cast<uint64_t>(0.5);
1450 return llvm::bit_cast<uint64_t>(-0.5);
1452 return llvm::bit_cast<uint64_t>(1.0);
1454 return llvm::bit_cast<uint64_t>(-1.0);
1456 return llvm::bit_cast<uint64_t>(2.0);
1458 return llvm::bit_cast<uint64_t>(-2.0);
1460 return llvm::bit_cast<uint64_t>(4.0);
1462 return llvm::bit_cast<uint64_t>(-4.0);
1464 return 0x3fc45f306dc9c882;
1548 using namespace AMDGPU;
1556 return VGPR_32RegClassID;
1558 case OPWV232:
return VReg_64RegClassID;
1559 case OPW96:
return VReg_96RegClassID;
1560 case OPW128:
return VReg_128RegClassID;
1561 case OPW192:
return VReg_192RegClassID;
1562 case OPW160:
return VReg_160RegClassID;
1563 case OPW256:
return VReg_256RegClassID;
1564 case OPW288:
return VReg_288RegClassID;
1565 case OPW320:
return VReg_320RegClassID;
1566 case OPW352:
return VReg_352RegClassID;
1567 case OPW384:
return VReg_384RegClassID;
1568 case OPW512:
return VReg_512RegClassID;
1569 case OPW1024:
return VReg_1024RegClassID;
1574 using namespace AMDGPU;
1582 return AGPR_32RegClassID;
1584 case OPWV232:
return AReg_64RegClassID;
1585 case OPW96:
return AReg_96RegClassID;
1586 case OPW128:
return AReg_128RegClassID;
1587 case OPW160:
return AReg_160RegClassID;
1588 case OPW256:
return AReg_256RegClassID;
1589 case OPW288:
return AReg_288RegClassID;
1590 case OPW320:
return AReg_320RegClassID;
1591 case OPW352:
return AReg_352RegClassID;
1592 case OPW384:
return AReg_384RegClassID;
1593 case OPW512:
return AReg_512RegClassID;
1594 case OPW1024:
return AReg_1024RegClassID;
1600 using namespace AMDGPU;
1608 return SGPR_32RegClassID;
1610 case OPWV232:
return SGPR_64RegClassID;
1611 case OPW96:
return SGPR_96RegClassID;
1612 case OPW128:
return SGPR_128RegClassID;
1613 case OPW160:
return SGPR_160RegClassID;
1614 case OPW256:
return SGPR_256RegClassID;
1615 case OPW288:
return SGPR_288RegClassID;
1616 case OPW320:
return SGPR_320RegClassID;
1617 case OPW352:
return SGPR_352RegClassID;
1618 case OPW384:
return SGPR_384RegClassID;
1619 case OPW512:
return SGPR_512RegClassID;
1624 using namespace AMDGPU;
1632 return TTMP_32RegClassID;
1634 case OPWV232:
return TTMP_64RegClassID;
1635 case OPW128:
return TTMP_128RegClassID;
1636 case OPW256:
return TTMP_256RegClassID;
1637 case OPW288:
return TTMP_288RegClassID;
1638 case OPW320:
return TTMP_320RegClassID;
1639 case OPW352:
return TTMP_352RegClassID;
1640 case OPW384:
return TTMP_384RegClassID;
1641 case OPW512:
return TTMP_512RegClassID;
1646 using namespace AMDGPU::EncValues;
1648 unsigned TTmpMin =
isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1649 unsigned TTmpMax =
isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1651 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1655 bool MandatoryLiteral,
1658 using namespace AMDGPU::EncValues;
1662 bool IsAGPR = Val & 512;
1665 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1675 bool MandatoryLiteral,
unsigned ImmWidth,
1679 assert(Val < (1 << 8) &&
"9-bit Src encoding when Val{8} is 0");
1680 using namespace AMDGPU::EncValues;
1684 static_assert(SGPR_MIN == 0);
1693 if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
1696 if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
1699 if (Val == LITERAL_CONST) {
1700 if (MandatoryLiteral)
1727 unsigned Val)
const {
1729 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::vdstX);
1733 Val |= ~XDstReg & 1;
1739 using namespace AMDGPU;
1775 using namespace AMDGPU;
1806 using namespace AMDGPU;
1827 using namespace AMDGPU::SDWA;
1828 using namespace AMDGPU::EncValues;
1834 if (
int(SDWA9EncValues::SRC_VGPR_MIN) <=
int(Val) &&
1835 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
1837 Val - SDWA9EncValues::SRC_VGPR_MIN);
1839 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
1840 Val <= (
isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
1841 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
1843 Val - SDWA9EncValues::SRC_SGPR_MIN);
1845 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
1846 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
1848 Val - SDWA9EncValues::SRC_TTMP_MIN);
1851 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
1853 if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX)
1856 if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
1875 using namespace AMDGPU::SDWA;
1879 "SDWAVopcDst should be present only on GFX9+");
1881 bool IsWave32 =
STI.
hasFeature(AMDGPU::FeatureWavefrontSize32);
1883 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
1884 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
1922 auto [
Version, W64, W32, MDP] = Encoding::decode(Imm);
1925 if (Encoding::encode(
Version, W64, W32, MDP) != Imm)
1935 if (
I == Versions.end())
1985 return STI.
hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2007 if (PopCount == 1) {
2008 S <<
"bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) <<
')';
2010 S <<
"bits in range ("
2011 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) <<
':'
2012 << (TrailingZeros + BaseBytes * CHAR_BIT) <<
')';
2018#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
2019#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2021 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
2023#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
2025 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
2026 << GET_FIELD(MASK) << '\n'; \
2029#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
2031 if (FourByteBuffer & (MASK)) { \
2032 return createStringError(std::errc::invalid_argument, \
2033 "kernel descriptor " DESC \
2034 " reserved %s set" MSG, \
2035 getBitRangeFromMask((MASK), 0).c_str()); \
2039#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
2040#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
2041 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
2042#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
2043 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
2044#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
2045 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
2050 using namespace amdhsa;
2058 uint32_t GranulatedWorkitemVGPRCount =
2059 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
2062 (GranulatedWorkitemVGPRCount + 1) *
2065 KdStream << Indent <<
".amdhsa_next_free_vgpr " << NextFreeVGPR <<
'\n';
2086 uint32_t GranulatedWavefrontSGPRCount =
2087 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
2091 "must be zero on gfx10+");
2093 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
2096 KdStream << Indent <<
".amdhsa_reserve_vcc " << 0 <<
'\n';
2098 KdStream << Indent <<
".amdhsa_reserve_flat_scratch " << 0 <<
'\n';
2099 KdStream << Indent <<
".amdhsa_reserve_xnack_mask " << 0 <<
'\n';
2100 KdStream << Indent <<
".amdhsa_next_free_sgpr " << NextFreeSGPR <<
"\n";
2105 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
2107 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
2109 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
2111 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
2117 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
2123 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
2129 PRINT_DIRECTIVE(
".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
2133 "COMPUTE_PGM_RSRC1",
"must be zero pre-gfx9");
2139 "COMPUTE_PGM_RSRC1",
"must be zero pre-gfx10");
2143 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
2144 PRINT_DIRECTIVE(
".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
2145 PRINT_DIRECTIVE(
".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
2150 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
2158 using namespace amdhsa;
2162 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2164 PRINT_DIRECTIVE(
".amdhsa_system_sgpr_private_segment_wavefront_offset",
2165 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2167 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2169 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2171 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2173 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2175 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2182 ".amdhsa_exception_fp_ieee_invalid_op",
2183 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2185 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2187 ".amdhsa_exception_fp_ieee_div_zero",
2188 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2190 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2192 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2194 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2196 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2206 using namespace amdhsa;
2209 KdStream << Indent <<
".amdhsa_accum_offset "
2210 << (
GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2213 PRINT_DIRECTIVE(
".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2216 "COMPUTE_PGM_RSRC3",
"must be zero on gfx90a");
2218 "COMPUTE_PGM_RSRC3",
"must be zero on gfx90a");
2222 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2224 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2227 "SHARED_VGPR_COUNT",
2228 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2232 "COMPUTE_PGM_RSRC3",
2233 "must be zero on gfx12+");
2239 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2241 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2243 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2246 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2249 "COMPUTE_PGM_RSRC3",
2250 "must be zero on gfx10");
2255 "COMPUTE_PGM_RSRC3",
"must be zero on gfx10+");
2260 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2263 "COMPUTE_PGM_RSRC3",
2264 "must be zero on gfx10 or gfx11");
2269 "COMPUTE_PGM_RSRC3",
"must be zero on gfx10+");
2274 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2277 "COMPUTE_PGM_RSRC3",
2278 "must be zero on gfx10");
2280 }
else if (FourByteBuffer) {
2282 std::errc::invalid_argument,
2283 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2287#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2288#undef PRINT_DIRECTIVE
2290#undef CHECK_RESERVED_BITS_IMPL
2291#undef CHECK_RESERVED_BITS
2292#undef CHECK_RESERVED_BITS_MSG
2293#undef CHECK_RESERVED_BITS_DESC
2294#undef CHECK_RESERVED_BITS_DESC_MSG
2299 const char *Msg =
"") {
2301 std::errc::invalid_argument,
"kernel descriptor reserved %s set%s%s",
2308 unsigned WidthInBytes) {
2312 std::errc::invalid_argument,
2313 "kernel descriptor reserved bits in range (%u:%u) set",
2314 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2320#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2322 KdStream << Indent << DIRECTIVE " " \
2323 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2335 switch (Cursor.
tell()) {
2337 FourByteBuffer = DE.
getU32(Cursor);
2338 KdStream << Indent <<
".amdhsa_group_segment_fixed_size " << FourByteBuffer
2343 FourByteBuffer = DE.
getU32(Cursor);
2344 KdStream << Indent <<
".amdhsa_private_segment_fixed_size "
2345 << FourByteBuffer <<
'\n';
2349 FourByteBuffer = DE.
getU32(Cursor);
2350 KdStream << Indent <<
".amdhsa_kernarg_size "
2351 << FourByteBuffer <<
'\n';
2356 ReservedBytes = DE.
getBytes(Cursor, 4);
2357 for (
int I = 0;
I < 4; ++
I) {
2358 if (ReservedBytes[
I] != 0)
2372 ReservedBytes = DE.
getBytes(Cursor, 20);
2373 for (
int I = 0;
I < 20; ++
I) {
2374 if (ReservedBytes[
I] != 0)
2380 FourByteBuffer = DE.
getU32(Cursor);
2384 FourByteBuffer = DE.
getU32(Cursor);
2388 FourByteBuffer = DE.
getU32(Cursor);
2392 using namespace amdhsa;
2393 TwoByteBuffer = DE.
getU16(Cursor);
2397 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2399 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2401 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2403 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2405 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2408 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2410 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2412 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2418 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2420 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2425 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2430 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2432 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2440 using namespace amdhsa;
2441 TwoByteBuffer = DE.
getU16(Cursor);
2442 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2444 KERNARG_PRELOAD_SPEC_LENGTH);
2447 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2449 KERNARG_PRELOAD_SPEC_OFFSET);
2455 ReservedBytes = DE.
getBytes(Cursor, 4);
2456 for (
int I = 0;
I < 4; ++
I) {
2457 if (ReservedBytes[
I] != 0)
2466#undef PRINT_DIRECTIVE
2473 if (Bytes.
size() != 64 || KdAddress % 64 != 0)
2475 "kernel descriptor must be 64-byte aligned");
2486 EnableWavefrontSize32 =
2488 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2493 KdStream <<
".amdhsa_kernel " << KdName <<
'\n';
2496 while (
C &&
C.tell() < Bytes.
size()) {
2504 KdStream <<
".end_amdhsa_kernel\n";
2523 "code object v2 is not supported");
2536const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(
StringRef Id,
2542 if (!
Sym->isVariable()) {
2546 bool Valid =
Sym->getVariableValue()->evaluateAsAbsolute(Res);
2547 if (!Valid || Res != Val)
2575 if (Result != Symbols->end()) {
2582 ReferencedAddresses.push_back(
static_cast<uint64_t>(
Value));
2601 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
unsigned const MachineRegisterInfo * MRI
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
#define CHECK_RESERVED_BITS_DESC(MASK, DESC)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, AMDGPU::OpName Name)
static SmallString< 32 > getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes)
Print a string describing the reserved bit range specified by Mask with offset BaseBytes for use in e...
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecoderUInt128 eat12Bytes(ArrayRef< uint8_t > &Bytes)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeSrcRegOrImmDeferred9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmVal16(unsigned Imm, AMDGPU::OperandSemantics Sema)
static int64_t getInlineImmValBF16(unsigned Imm)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_8(RegClass)
static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, AMDGPUDisassembler::OpWidthTy OpWidth, unsigned Imm, unsigned EncImm, bool MandatoryLiteral, unsigned ImmWidth, AMDGPU::OperandSemantics Sema, const MCDisassembler *Decoder)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecoderUInt128 eat16Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_VSrcT16_Lo128_Deferred(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
#define DECODE_OPERAND_REG_7(RegClass, OpWidth)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define CHECK_RESERVED_BITS(MASK)
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, AMDGPUDisassembler::OpWidthTy Opw, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_MSG(MASK, MSG)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValF16(unsigned Imm)
static Error createReservedKDBytesError(unsigned BaseInBytes, unsigned WidthInBytes)
Create an error object to return from onSymbolStart for reserved kernel descriptor bytes being set.
static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)
static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, const char *Msg="")
Create an error object to return from onSymbolStart for reserved kernel descriptor bits being set.
static void adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo &MRI, MCOperand &MO, uint8_t NumRegs)
Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the appropriate subregister fo...
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
Analysis containing CSE Info
#define LLVM_EXTERNAL_VISIBILITY
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
void convertVOPC64DPPInst(MCInst &MI) const
bool hasKernargPreload() const
void convertEXPInst(MCInst &MI) const
MCOperand createRegOperand(unsigned int RegId) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
Expected< bool > decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
Expected< bool > decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
void convertVOPCDPPInst(MCInst &MI) const
unsigned getVgprClassId(const OpWidthTy Width) const
unsigned getAgprClassId(const OpWidthTy Width) const
MCOperand decodeSpecialReg96Plus(unsigned Val) const
MCOperand decodeSDWASrc32(unsigned Val) const
void setABIVersion(unsigned Version) override
ELF-specific, set the ABI version from the object header.
Expected< bool > decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
MCOperand decodeDpp8FI(unsigned Val) const
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
MCOperand decodeBoolReg(unsigned Val) const
void convertDPP8Inst(MCInst &MI) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
MCOperand decodeVersionImm(unsigned Imm) const
MCOperand decodeSDWASrc(const OpWidthTy Width, unsigned Val, unsigned ImmWidth, AMDGPU::OperandSemantics Sema) const
Expected< bool > decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
MCOperand decodeSplitBarrier(unsigned Val) const
void convertVOP3DPPInst(MCInst &MI) const
void convertTrue16OpSel(MCInst &MI) const
void convertFMAanyK(MCInst &MI, int ImmLitIdx) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
MCOperand decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral=false, unsigned ImmWidth=0, AMDGPU::OperandSemantics Sema=AMDGPU::OperandSemantics::INT) const
Expected< bool > decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
static MCOperand decodeFPImmed(unsigned ImmWidth, unsigned Imm, AMDGPU::OperandSemantics Sema)
MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral=false, unsigned ImmWidth=0, AMDGPU::OperandSemantics Sema=AMDGPU::OperandSemantics::INT) const
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand decodeLiteralConstant(bool ExtendFP64) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
void convertVINTERPInst(MCInst &MI) const
void convertSDWAInst(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
unsigned getSgprClassId(const OpWidthTy Width) const
static MCOperand decodeIntImmed(unsigned Imm)
void convertMAIInst(MCInst &MI) const
f8f6f4 instructions have different pseudos depending on the used formats.
bool hasArchitectedFlatScratch() const
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
unsigned getTtmpClassId(const OpWidthTy Width) const
void convertMIMGInst(MCInst &MI) const
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
void convertVOP3PDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
MCOperand decodeSDWASrc16(unsigned Val) const
Expected< bool > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address) const override
Used to perform separate target specific disassembly for a particular symbol.
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
This class represents an Operation in the Expression.
Lightweight error class with error context and mandatory checking.
Tagged union holding either a T or a Error.
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
const MCRegisterInfo * getRegisterInfo() const
void reportWarning(SMLoc L, const Twine &Msg)
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Superclass for all disassemblers.
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Base class for the full range of assembler expressions which are needed for parsing.
Instances of this class represent a single low-level machine instruction.
unsigned getNumOperands() const
unsigned getOpcode() const
void addOperand(const MCOperand Op)
const MCOperand & getOperand(unsigned i) const
Describe properties that are true of each instruction in the target description file.
Interface to description of machine instruction set.
Instances of this class represent operands of the MCInst class.
static MCOperand createExpr(const MCExpr *Val)
static MCOperand createReg(MCRegister Reg)
static MCOperand createImm(int64_t Val)
void setReg(MCRegister Reg)
Set the register number.
MCRegister getReg() const
Returns the register number.
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const MCRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
uint16_t getEncodingValue(MCRegister Reg) const
Returns the encoding for Reg.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Wrapper class representing physical registers. Should be passed by value.
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Symbolize and annotate disassembled instructions.
Represents a location in source code.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
StringRef - Represent a constant reference to a string, i.e.
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM Value Representation.
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
A raw_ostream that writes to an SmallVector or SmallString.
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
ArrayRef< GFXVersion > getGFXVersions()
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isGFX10(const MCSubtargetInfo &STI)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isVOPC64DPP(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_REG_IMM_FP32_DEFERRED
@ OPERAND_REG_IMM_FP16_DEFERRED
bool hasGDS(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool hasVOPD(const MCSubtargetInfo &STI)
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
@ C
The default llvm calling convention, compatible with C.
@ KERNEL_CODE_PROPERTIES_OFFSET
@ GROUP_SEGMENT_FIXED_SIZE_OFFSET
@ COMPUTE_PGM_RSRC3_OFFSET
@ KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET
@ COMPUTE_PGM_RSRC1_OFFSET
@ COMPUTE_PGM_RSRC2_OFFSET
@ PRIVATE_SEGMENT_FIXED_SIZE_OFFSET
uint16_t read16(const void *P, endianness E)
This is an optimization pass for GlobalISel generic memory operations.
int popcount(T Value) noexcept
Count the number of set bits in a value.
raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Target & getTheGCNTarget()
The target for GCN GPUs.
std::vector< SymbolInfoTy > SectionSymbolsTy
unsigned M0(unsigned Val)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Description of the encoding of one expression Op.
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.