42#define DEBUG_TYPE "amdgpu-disassembler"
45 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
46 : AMDGPU::EncValues::SGPR_MAX_SI)
58 MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
59 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
65 createConstantSymbolExpr(Symbol, Code);
67 UCVersionW64Expr = createConstantSymbolExpr(
"UC_VERSION_W64_BIT", 0x2000);
68 UCVersionW32Expr = createConstantSymbolExpr(
"UC_VERSION_W32_BIT", 0x4000);
69 UCVersionMDPExpr = createConstantSymbolExpr(
"UC_VERSION_MDP_BIT", 0x8000);
85 AMDGPU::OpName
Name) {
86 int OpIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
Name);
101 int64_t
Offset = SignExtend64<16>(Imm) * 4 + 4 +
Addr;
103 if (DAsm->tryAddingSymbolicOperand(Inst,
Offset,
Addr,
true, 2, 2, 0))
112 if (DAsm->isGFX12Plus()) {
113 Offset = SignExtend64<24>(Imm);
114 }
else if (DAsm->isVI()) {
117 Offset = SignExtend64<21>(Imm);
125 return addOperand(Inst, DAsm->decodeBoolReg(Val));
132 return addOperand(Inst, DAsm->decodeSplitBarrier(Val));
138 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
141#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
142 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
144 const MCDisassembler *Decoder) { \
145 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
146 return addOperand(Inst, DAsm->DecoderName(Imm)); \
151#define DECODE_OPERAND_REG_8(RegClass) \
152 static DecodeStatus Decode##RegClass##RegisterClass( \
153 MCInst &Inst, unsigned Imm, uint64_t , \
154 const MCDisassembler *Decoder) { \
155 assert(Imm < (1 << 8) && "8-bit encoding"); \
156 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
158 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
161#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm) \
162 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t , \
163 const MCDisassembler *Decoder) { \
164 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
165 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
166 return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm)); \
170 unsigned OpWidth,
unsigned Imm,
unsigned EncImm,
172 assert(Imm < (1U << EncSize) &&
"Operand doesn't fit encoding!");
174 return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm));
179#define DECODE_OPERAND_SREG_7(RegClass, OpWidth) \
180 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm)
182#define DECODE_OPERAND_SREG_8(RegClass, OpWidth) \
183 DECODE_SrcOp(Decode##RegClass##RegisterClass, 8, OpWidth, Imm)
189template <
unsigned OpW
idth>
197template <
unsigned OpW
idth>
201 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
207template <
unsigned OpW
idth>
210 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
215template <
unsigned OpW
idth>
219 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, Decoder);
227template <
unsigned OpW
idth>
231 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
236template <
unsigned OpW
idth>
240 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
287 assert(isUInt<10>(Imm) &&
"10-bit encoding expected");
288 assert((Imm & (1 << 8)) == 0 &&
"Imm{8} should not be used");
290 bool IsHi = Imm & (1 << 9);
291 unsigned RegIdx = Imm & 0xff;
293 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
299 assert(isUInt<8>(Imm) &&
"8-bit encoding expected");
301 bool IsHi = Imm & (1 << 7);
302 unsigned RegIdx = Imm & 0x7f;
304 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
307template <
unsigned OpW
idth>
311 assert(isUInt<9>(Imm) &&
"9-bit encoding expected");
315 bool IsHi = Imm & (1 << 7);
316 unsigned RegIdx = Imm & 0x7f;
317 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
319 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(OpWidth, Imm & 0xFF));
322template <
unsigned OpW
idth>
326 assert(isUInt<10>(Imm) &&
"10-bit encoding expected");
330 bool IsHi = Imm & (1 << 9);
331 unsigned RegIdx = Imm & 0xff;
332 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
334 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(OpWidth, Imm & 0xFF));
340 assert(isUInt<10>(Imm) &&
"10-bit encoding expected");
345 bool IsHi = Imm & (1 << 9);
346 unsigned RegIdx = Imm & 0xff;
347 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
354 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
361 return addOperand(Inst, DAsm->decodeMandatoryLiteral64Constant(Imm));
367 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
381 return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
387 if (!DAsm->isGFX90A()) {
396 uint64_t TSFlags = DAsm->getMCII()->get(
Opc).TSFlags;
398 ? AMDGPU::OpName::data0
399 : AMDGPU::OpName::vdata;
401 int DataIdx = AMDGPU::getNamedOperandIdx(
Opc, DataName);
403 int DstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
409 int Data2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
415 return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
418template <
unsigned Opw>
428 assert(Imm < (1 << 9) &&
"9-bit encoding");
430 return addOperand(Inst, DAsm->decodeSrcOp(64, Imm));
433#define DECODE_SDWA(DecName) \
434DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
444 return addOperand(Inst, DAsm->decodeVersionImm(Imm));
447#include "AMDGPUGenDisassemblerTables.inc"
453template <
typename InsnType>
461 const auto SavedBytes = Bytes;
468 decodeInstruction(Table, TmpInst, Inst,
Address,
this,
STI);
474 Comments << LocalComments;
481template <
typename InsnType>
486 for (
const uint8_t *
T : {Table1, Table2}) {
496 support::endian::read<T, llvm::endianness::little>(Bytes.
data());
497 Bytes = Bytes.
slice(
sizeof(
T));
504 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.
data());
505 Bytes = Bytes.
slice(8);
507 support::endian::read<uint32_t, llvm::endianness::little>(Bytes.
data());
508 Bytes = Bytes.
slice(4);
515 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.
data());
516 Bytes = Bytes.
slice(8);
518 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.
data());
519 Bytes = Bytes.
slice(8);
523void AMDGPUDisassembler::decodeImmOperands(
MCInst &
MI,
527 if (OpNo >=
MI.getNumOperands())
540 int64_t
Imm =
Op.getImm();
555 switch (OpDesc.OperandType) {
589 unsigned MaxInstBytesNum = std::min((
size_t)TargetMaxInstBytes, Bytes_.
size());
590 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
594 Size = std::min((
size_t)4, Bytes_.
size());
606 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
633 Bytes = Bytes_.
slice(4, MaxInstBytesNum - 4);
641 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
643 }
else if (Bytes.
size() >= 16 &&
650 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
653 if (Bytes.
size() >= 8) {
654 const uint64_t QW = eatBytes<uint64_t>(Bytes);
717 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
721 if (Bytes.
size() >= 4) {
722 const uint32_t DW = eatBytes<uint32_t>(Bytes);
770 decodeImmOperands(
MI, *MCII);
782 else if (AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::dpp8) !=
794 AMDGPU::OpName::src2_modifiers);
797 if (
MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
798 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
801 AMDGPU::OpName::src2_modifiers);
811 int CPolPos = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
812 AMDGPU::OpName::cpol);
817 if (
MI.getNumOperands() <= (
unsigned)CPolPos) {
819 AMDGPU::OpName::cpol);
821 MI.getOperand(CPolPos).setImm(
MI.getOperand(CPolPos).getImm() | CPol);
831 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::tfe);
832 if (TFEOpIdx != -1) {
833 auto *TFEIter =
MI.begin();
834 std::advance(TFEIter, TFEOpIdx);
842 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::swz);
843 if (SWZOpIdx != -1) {
844 auto *SWZIter =
MI.begin();
845 std::advance(SWZIter, SWZOpIdx);
852 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vaddr0);
854 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::srsrc);
855 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
856 if (VAddr0Idx >= 0 && NSAArgs > 0) {
857 unsigned NSAWords = (NSAArgs + 3) / 4;
858 if (Bytes.
size() < 4 * NSAWords)
860 for (
unsigned i = 0; i < NSAArgs; ++i) {
861 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
863 MCII->
get(
MI.getOpcode()).operands()[VAddrIdx].RegClass;
866 Bytes = Bytes.
slice(4 * NSAWords);
891 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
892 AMDGPU::OpName::vdst_in);
893 if (VDstIn_Idx != -1) {
896 if (Tied != -1 && (
MI.getNumOperands() <= (
unsigned)VDstIn_Idx ||
897 !
MI.getOperand(VDstIn_Idx).isReg() ||
898 MI.getOperand(VDstIn_Idx).getReg() !=
MI.getOperand(Tied).getReg())) {
899 if (
MI.getNumOperands() > (
unsigned)VDstIn_Idx)
900 MI.erase(&
MI.getOperand(VDstIn_Idx));
903 AMDGPU::OpName::vdst_in);
917 if (Bytes_[0] != ExecEncoding)
921 Size = MaxInstBytesNum - Bytes.
size();
936 if (
MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
937 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
938 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
939 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
940 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
941 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
942 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
943 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
944 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
945 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
946 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
947 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
948 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
949 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
950 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
951 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12) {
965 int SDst = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::sdst);
969 AMDGPU::OpName::sdst);
983 return MO.
setReg(
MRI.getSubReg(MO.
getReg(), AMDGPU::sub0_sub1_sub2_sub3));
986 MRI.getSubReg(MO.
getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5));
989 MO.
getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7)) {
997 BaseReg, AMDGPU::sub0, &
MRI.getRegClass(AMDGPU::VReg_384RegClassID));
1015 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::blgp);
1020 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::cbsz);
1022 unsigned CBSZ =
MI.getOperand(CbszIdx).getImm();
1023 unsigned BLGP =
MI.getOperand(BlgpIdx).getImm();
1027 if (!AdjustedRegClassOpcode ||
1028 AdjustedRegClassOpcode->
Opcode ==
MI.getOpcode())
1031 MI.setOpcode(AdjustedRegClassOpcode->
Opcode);
1033 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
1035 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src1);
1044 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::matrix_a_fmt);
1049 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::matrix_b_fmt);
1051 unsigned FmtA =
MI.getOperand(FmtAIdx).getImm();
1052 unsigned FmtB =
MI.getOperand(FmtBIdx).getImm();
1056 if (!AdjustedRegClassOpcode ||
1057 AdjustedRegClassOpcode->
Opcode ==
MI.getOpcode())
1060 MI.setOpcode(AdjustedRegClassOpcode->
Opcode);
1062 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
1064 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src1);
1073 unsigned OpSelHi = 0;
1082 bool IsVOP3P =
false) {
1084 unsigned Opc =
MI.getOpcode();
1085 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
1086 AMDGPU::OpName::src1_modifiers,
1087 AMDGPU::OpName::src2_modifiers};
1088 for (
int J = 0; J < 3; ++J) {
1089 int OpIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
1093 unsigned Val =
MI.getOperand(
OpIdx).getImm();
1100 }
else if (J == 0) {
1111 const unsigned Opc =
MI.getOpcode();
1114 constexpr std::array<std::tuple<AMDGPU::OpName, AMDGPU::OpName, unsigned>, 4>
1115 OpAndOpMods = {{{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
1117 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
1119 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
1121 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
1123 for (
const auto &[
OpName, OpModsName, OpSelMask] : OpAndOpMods) {
1125 int OpModsIdx = AMDGPU::getNamedOperandIdx(
Opc, OpModsName);
1126 if (
OpIdx == -1 || OpModsIdx == -1)
1134 const MCOperand &OpMods =
MI.getOperand(OpModsIdx);
1135 unsigned ModVal = OpMods.
getImm();
1136 if (ModVal & OpSelMask) {
1146 constexpr int DST_IDX = 0;
1147 auto Opcode =
MI.getOpcode();
1148 const auto &
Desc = MCII->
get(Opcode);
1149 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
1151 if (OldIdx != -1 &&
Desc.getOperandConstraint(
1155 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
1169 AMDGPU::OpName::src2_modifiers);
1173 unsigned Opc =
MI.getOpcode();
1176 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vdst_in);
1177 if (VDstInIdx != -1)
1181 if (
MI.getNumOperands() < DescNumOps &&
1186 AMDGPU::OpName::op_sel);
1189 if (
MI.getNumOperands() < DescNumOps &&
1192 AMDGPU::OpName::src0_modifiers);
1194 if (
MI.getNumOperands() < DescNumOps &&
1197 AMDGPU::OpName::src1_modifiers);
1205 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vdst_in);
1206 if (VDstInIdx != -1)
1209 unsigned Opc =
MI.getOpcode();
1211 if (
MI.getNumOperands() < DescNumOps &&
1215 AMDGPU::OpName::op_sel);
1225 int VDstIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1226 AMDGPU::OpName::vdst);
1228 int VDataIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1229 AMDGPU::OpName::vdata);
1231 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vaddr0);
1233 ? AMDGPU::OpName::srsrc
1234 : AMDGPU::OpName::rsrc;
1235 int RsrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), RsrcOpName);
1236 int DMaskIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1237 AMDGPU::OpName::dmask);
1239 int TFEIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1240 AMDGPU::OpName::tfe);
1241 int D16Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1242 AMDGPU::OpName::d16);
1249 if (BaseOpcode->
BVH) {
1255 bool IsAtomic = (VDstIdx != -1);
1259 bool IsPartialNSA =
false;
1260 unsigned AddrSize =
Info->VAddrDwords;
1264 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::dim);
1266 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::a16);
1269 const bool IsA16 = (A16Idx != -1 &&
MI.getOperand(A16Idx).getImm());
1276 IsNSA =
Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1277 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1278 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
1280 if (!IsVSample && AddrSize > 12)
1283 if (AddrSize >
Info->VAddrDwords) {
1289 IsPartialNSA =
true;
1294 unsigned DMask =
MI.getOperand(DMaskIdx).getImm() & 0xf;
1295 unsigned DstSize = IsGather4 ? 4 : std::max(
llvm::popcount(DMask), 1);
1297 bool D16 = D16Idx >= 0 &&
MI.getOperand(D16Idx).getImm();
1299 DstSize = (DstSize + 1) / 2;
1302 if (TFEIdx != -1 &&
MI.getOperand(TFEIdx).getImm())
1305 if (DstSize ==
Info->VDataDwords && AddrSize ==
Info->VAddrDwords)
1310 if (NewOpcode == -1)
1315 if (DstSize !=
Info->VDataDwords) {
1316 auto DataRCID = MCII->
get(NewOpcode).
operands()[VDataIdx].RegClass;
1321 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1334 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1336 if (
STI.
hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1337 AddrSize !=
Info->VAddrDwords) {
1338 MCRegister VAddrSA =
MI.getOperand(VAddrSAIdx).getReg();
1340 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1342 auto AddrRCID = MCII->
get(NewOpcode).
operands()[VAddrSAIdx].RegClass;
1349 MI.setOpcode(NewOpcode);
1351 if (NewVdata != AMDGPU::NoRegister) {
1363 assert(AddrSize <= Info->VAddrDwords);
1364 MI.erase(
MI.begin() + VAddr0Idx + AddrSize,
1365 MI.begin() + VAddr0Idx +
Info->VAddrDwords);
1373 unsigned Opc =
MI.getOpcode();
1377 if (
MI.getNumOperands() < DescNumOps &&
1381 if (
MI.getNumOperands() < DescNumOps &&
1384 AMDGPU::OpName::op_sel);
1385 if (
MI.getNumOperands() < DescNumOps &&
1388 AMDGPU::OpName::op_sel_hi);
1389 if (
MI.getNumOperands() < DescNumOps &&
1392 AMDGPU::OpName::neg_lo);
1393 if (
MI.getNumOperands() < DescNumOps &&
1396 AMDGPU::OpName::neg_hi);
1401 unsigned Opc =
MI.getOpcode();
1404 if (
MI.getNumOperands() < DescNumOps &&
1408 if (
MI.getNumOperands() < DescNumOps &&
1411 AMDGPU::OpName::src0_modifiers);
1413 if (
MI.getNumOperands() < DescNumOps &&
1416 AMDGPU::OpName::src1_modifiers);
1420 unsigned Opc =
MI.getOpcode();
1425 if (
MI.getNumOperands() < DescNumOps &&
1429 AMDGPU::OpName::op_sel);
1434 assert(HasLiteral &&
"Should have decoded a literal");
1445 const Twine& ErrMsg)
const {
1460 unsigned Val)
const {
1461 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1462 if (Val >= RegCl.getNumRegs())
1464 ": unknown register " +
Twine(Val));
1470 unsigned Val)
const {
1474 switch (SRegClassID) {
1475 case AMDGPU::SGPR_32RegClassID:
1476 case AMDGPU::TTMP_32RegClassID:
1478 case AMDGPU::SGPR_64RegClassID:
1479 case AMDGPU::TTMP_64RegClassID:
1482 case AMDGPU::SGPR_96RegClassID:
1483 case AMDGPU::TTMP_96RegClassID:
1484 case AMDGPU::SGPR_128RegClassID:
1485 case AMDGPU::TTMP_128RegClassID:
1488 case AMDGPU::SGPR_256RegClassID:
1489 case AMDGPU::TTMP_256RegClassID:
1492 case AMDGPU::SGPR_288RegClassID:
1493 case AMDGPU::TTMP_288RegClassID:
1494 case AMDGPU::SGPR_320RegClassID:
1495 case AMDGPU::TTMP_320RegClassID:
1496 case AMDGPU::SGPR_352RegClassID:
1497 case AMDGPU::TTMP_352RegClassID:
1498 case AMDGPU::SGPR_384RegClassID:
1499 case AMDGPU::TTMP_384RegClassID:
1500 case AMDGPU::SGPR_512RegClassID:
1501 case AMDGPU::TTMP_512RegClassID:
1510 if (Val % (1 << shift)) {
1512 <<
": scalar reg isn't aligned " << Val;
1520 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1530 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1532 return errOperand(Val,
"More than one unique literal is illegal");
1542 if (Literal64 != Val)
1543 return errOperand(Val,
"More than one unique literal is illegal");
1546 Literal = Literal64 = Val;
1555 if (Bytes.
size() < 4) {
1556 return errOperand(0,
"cannot read literal, inst bytes left " +
1560 Literal = Literal64 = eatBytes<uint32_t>(Bytes);
1571 if (Bytes.
size() < 8) {
1572 return errOperand(0,
"cannot read literal64, inst bytes left " +
1576 Literal64 = eatBytes<uint64_t>(Bytes);
1582 using namespace AMDGPU::EncValues;
1584 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1586 (
static_cast<int64_t
>(Imm) - INLINE_INTEGER_C_MIN) :
1587 (INLINE_INTEGER_C_POSITIVE_MAX -
static_cast<int64_t
>(Imm)));
1594 return llvm::bit_cast<uint32_t>(0.5f);
1596 return llvm::bit_cast<uint32_t>(-0.5f);
1598 return llvm::bit_cast<uint32_t>(1.0f);
1600 return llvm::bit_cast<uint32_t>(-1.0f);
1602 return llvm::bit_cast<uint32_t>(2.0f);
1604 return llvm::bit_cast<uint32_t>(-2.0f);
1606 return llvm::bit_cast<uint32_t>(4.0f);
1608 return llvm::bit_cast<uint32_t>(-4.0f);
1619 return llvm::bit_cast<uint64_t>(0.5);
1621 return llvm::bit_cast<uint64_t>(-0.5);
1623 return llvm::bit_cast<uint64_t>(1.0);
1625 return llvm::bit_cast<uint64_t>(-1.0);
1627 return llvm::bit_cast<uint64_t>(2.0);
1629 return llvm::bit_cast<uint64_t>(-2.0);
1631 return llvm::bit_cast<uint64_t>(4.0);
1633 return llvm::bit_cast<uint64_t>(-4.0);
1635 return 0x3fc45f306dc9c882;
1692 using namespace AMDGPU;
1697 return VGPR_32RegClassID;
1699 return VReg_64RegClassID;
1701 return VReg_96RegClassID;
1703 return VReg_128RegClassID;
1705 return VReg_160RegClassID;
1707 return VReg_192RegClassID;
1709 return VReg_256RegClassID;
1711 return VReg_288RegClassID;
1713 return VReg_320RegClassID;
1715 return VReg_352RegClassID;
1717 return VReg_384RegClassID;
1719 return VReg_512RegClassID;
1721 return VReg_1024RegClassID;
1727 using namespace AMDGPU;
1732 return AGPR_32RegClassID;
1734 return AReg_64RegClassID;
1736 return AReg_96RegClassID;
1738 return AReg_128RegClassID;
1740 return AReg_160RegClassID;
1742 return AReg_256RegClassID;
1744 return AReg_288RegClassID;
1746 return AReg_320RegClassID;
1748 return AReg_352RegClassID;
1750 return AReg_384RegClassID;
1752 return AReg_512RegClassID;
1754 return AReg_1024RegClassID;
1760 using namespace AMDGPU;
1765 return SGPR_32RegClassID;
1767 return SGPR_64RegClassID;
1769 return SGPR_96RegClassID;
1771 return SGPR_128RegClassID;
1773 return SGPR_160RegClassID;
1775 return SGPR_256RegClassID;
1777 return SGPR_288RegClassID;
1779 return SGPR_320RegClassID;
1781 return SGPR_352RegClassID;
1783 return SGPR_384RegClassID;
1785 return SGPR_512RegClassID;
1791 using namespace AMDGPU;
1796 return TTMP_32RegClassID;
1798 return TTMP_64RegClassID;
1800 return TTMP_128RegClassID;
1802 return TTMP_256RegClassID;
1804 return TTMP_288RegClassID;
1806 return TTMP_320RegClassID;
1808 return TTMP_352RegClassID;
1810 return TTMP_384RegClassID;
1812 return TTMP_512RegClassID;
1818 using namespace AMDGPU::EncValues;
1820 unsigned TTmpMin =
isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1821 unsigned TTmpMax =
isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1823 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1827 using namespace AMDGPU::EncValues;
1831 bool IsAGPR = Val & 512;
1834 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1842 unsigned Val)
const {
1845 assert(Val < (1 << 8) &&
"9-bit Src encoding when Val{8} is 0");
1846 using namespace AMDGPU::EncValues;
1850 static_assert(SGPR_MIN == 0);
1859 if ((INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) ||
1860 (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) ||
1861 Val == LITERAL_CONST)
1864 if (Val == LITERAL64_CONST &&
STI.
hasFeature(AMDGPU::Feature64BitLiterals)) {
1887 unsigned Val)
const {
1889 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::vdstX);
1893 Val |= ~XDstReg & 1;
1898 using namespace AMDGPU;
1936 using namespace AMDGPU;
1968 using namespace AMDGPU;
1986 const unsigned Val)
const {
1987 using namespace AMDGPU::SDWA;
1988 using namespace AMDGPU::EncValues;
1994 if (
int(SDWA9EncValues::SRC_VGPR_MIN) <=
int(Val) &&
1995 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
1997 Val - SDWA9EncValues::SRC_VGPR_MIN);
1999 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
2000 Val <= (
isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
2001 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
2003 Val - SDWA9EncValues::SRC_SGPR_MIN);
2005 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
2006 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
2008 Val - SDWA9EncValues::SRC_TTMP_MIN);
2011 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
2013 if ((INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) ||
2014 (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX))
2033 using namespace AMDGPU::SDWA;
2037 "SDWAVopcDst should be present only on GFX9+");
2039 bool IsWave32 =
STI.
hasFeature(AMDGPU::FeatureWavefrontSize32);
2041 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
2042 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
2079 auto [
Version, W64, W32, MDP] = Encoding::decode(Imm);
2082 if (Encoding::encode(
Version, W64, W32, MDP) != Imm)
2092 if (
I == Versions.end())
2144 return STI.
hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2166 if (PopCount == 1) {
2167 S <<
"bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) <<
')';
2169 S <<
"bits in range ("
2170 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) <<
':'
2171 << (TrailingZeros + BaseBytes * CHAR_BIT) <<
')';
2177#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
2178#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2180 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
2182#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
2184 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
2185 << GET_FIELD(MASK) << '\n'; \
2188#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
2190 if (FourByteBuffer & (MASK)) { \
2191 return createStringError(std::errc::invalid_argument, \
2192 "kernel descriptor " DESC \
2193 " reserved %s set" MSG, \
2194 getBitRangeFromMask((MASK), 0).c_str()); \
2198#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
2199#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
2200 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
2201#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
2202 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
2203#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
2204 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
2209 using namespace amdhsa;
2217 uint32_t GranulatedWorkitemVGPRCount =
2218 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
2221 (GranulatedWorkitemVGPRCount + 1) *
2224 KdStream << Indent <<
".amdhsa_next_free_vgpr " << NextFreeVGPR <<
'\n';
2245 uint32_t GranulatedWavefrontSGPRCount =
2246 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
2250 "must be zero on gfx10+");
2252 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
2255 KdStream << Indent <<
".amdhsa_reserve_vcc " << 0 <<
'\n';
2257 KdStream << Indent <<
".amdhsa_reserve_flat_scratch " << 0 <<
'\n';
2258 KdStream << Indent <<
".amdhsa_reserve_xnack_mask " << 0 <<
'\n';
2259 KdStream << Indent <<
".amdhsa_next_free_sgpr " << NextFreeSGPR <<
"\n";
2264 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
2266 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
2268 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
2270 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
2276 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
2282 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
2289 PRINT_DIRECTIVE(
".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
2292 "COMPUTE_PGM_RSRC1",
"must be zero pre-gfx9");
2298 COMPUTE_PGM_RSRC1_GFX125_FLAT_SCRATCH_IS_NV);
2301 "COMPUTE_PGM_RSRC1");
2312 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
2314 PRINT_DIRECTIVE(
".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
2315 PRINT_DIRECTIVE(
".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
2318 "COMPUTE_PGM_RSRC1");
2323 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
2331 using namespace amdhsa;
2335 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2337 PRINT_DIRECTIVE(
".amdhsa_system_sgpr_private_segment_wavefront_offset",
2338 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2340 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2342 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2344 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2346 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2348 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2355 ".amdhsa_exception_fp_ieee_invalid_op",
2356 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2358 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2360 ".amdhsa_exception_fp_ieee_div_zero",
2361 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2363 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2365 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2367 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2369 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2379 using namespace amdhsa;
2382 KdStream << Indent <<
".amdhsa_accum_offset "
2383 << (
GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2386 PRINT_DIRECTIVE(
".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2389 "COMPUTE_PGM_RSRC3",
"must be zero on gfx90a");
2391 "COMPUTE_PGM_RSRC3",
"must be zero on gfx90a");
2395 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2397 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2400 "SHARED_VGPR_COUNT",
2401 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2405 "COMPUTE_PGM_RSRC3",
2406 "must be zero on gfx12+");
2412 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2414 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2416 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2419 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2422 "COMPUTE_PGM_RSRC3",
2423 "must be zero on gfx10");
2428 "COMPUTE_PGM_RSRC3",
"must be zero on gfx10+");
2433 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2436 "COMPUTE_PGM_RSRC3",
2437 "must be zero on gfx10 or gfx11");
2443 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT);
2445 "ENABLE_DYNAMIC_VGPR", COMPUTE_PGM_RSRC3_GFX125_ENABLE_DYNAMIC_VGPR);
2447 COMPUTE_PGM_RSRC3_GFX125_TCP_SPLIT);
2449 "ENABLE_DIDT_THROTTLE",
2450 COMPUTE_PGM_RSRC3_GFX125_ENABLE_DIDT_THROTTLE);
2453 "COMPUTE_PGM_RSRC3",
2454 "must be zero on gfx10+");
2459 "COMPUTE_PGM_RSRC3",
"must be zero on gfx10+");
2464 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2467 "COMPUTE_PGM_RSRC3",
2468 "must be zero on gfx10");
2470 }
else if (FourByteBuffer) {
2472 std::errc::invalid_argument,
2473 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2477#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2478#undef PRINT_DIRECTIVE
2480#undef CHECK_RESERVED_BITS_IMPL
2481#undef CHECK_RESERVED_BITS
2482#undef CHECK_RESERVED_BITS_MSG
2483#undef CHECK_RESERVED_BITS_DESC
2484#undef CHECK_RESERVED_BITS_DESC_MSG
2489 const char *Msg =
"") {
2491 std::errc::invalid_argument,
"kernel descriptor reserved %s set%s%s",
2498 unsigned WidthInBytes) {
2502 std::errc::invalid_argument,
2503 "kernel descriptor reserved bits in range (%u:%u) set",
2504 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2510#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2512 KdStream << Indent << DIRECTIVE " " \
2513 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2525 switch (Cursor.
tell()) {
2527 FourByteBuffer = DE.
getU32(Cursor);
2528 KdStream << Indent <<
".amdhsa_group_segment_fixed_size " << FourByteBuffer
2533 FourByteBuffer = DE.
getU32(Cursor);
2534 KdStream << Indent <<
".amdhsa_private_segment_fixed_size "
2535 << FourByteBuffer <<
'\n';
2539 FourByteBuffer = DE.
getU32(Cursor);
2540 KdStream << Indent <<
".amdhsa_kernarg_size "
2541 << FourByteBuffer <<
'\n';
2546 ReservedBytes = DE.
getBytes(Cursor, 4);
2547 for (
int I = 0;
I < 4; ++
I) {
2548 if (ReservedBytes[
I] != 0)
2562 ReservedBytes = DE.
getBytes(Cursor, 20);
2563 for (
int I = 0;
I < 20; ++
I) {
2564 if (ReservedBytes[
I] != 0)
2570 FourByteBuffer = DE.
getU32(Cursor);
2574 FourByteBuffer = DE.
getU32(Cursor);
2578 FourByteBuffer = DE.
getU32(Cursor);
2582 using namespace amdhsa;
2583 TwoByteBuffer = DE.
getU16(Cursor);
2587 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2589 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2591 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2593 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2595 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2598 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2600 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2603 KERNEL_CODE_PROPERTY_USES_CU_STORES);
2605 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2611 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2613 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2618 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2623 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2625 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2633 using namespace amdhsa;
2634 TwoByteBuffer = DE.
getU16(Cursor);
2635 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2637 KERNARG_PRELOAD_SPEC_LENGTH);
2640 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2642 KERNARG_PRELOAD_SPEC_OFFSET);
2648 ReservedBytes = DE.
getBytes(Cursor, 4);
2649 for (
int I = 0;
I < 4; ++
I) {
2650 if (ReservedBytes[
I] != 0)
2659#undef PRINT_DIRECTIVE
2666 if (Bytes.
size() != 64 || KdAddress % 64 != 0)
2668 "kernel descriptor must be 64-byte aligned");
2679 EnableWavefrontSize32 =
2681 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2686 KdStream <<
".amdhsa_kernel " << KdName <<
'\n';
2689 while (
C &&
C.tell() < Bytes.
size()) {
2697 KdStream <<
".end_amdhsa_kernel\n";
2716 "code object v2 is not supported");
2729const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(
StringRef Id,
2735 if (!
Sym->isVariable()) {
2739 bool Valid =
Sym->getVariableValue()->evaluateAsAbsolute(Res);
2740 if (!Valid || Res != Val)
2768 if (Result != Symbols->end()) {
2775 ReferencedAddresses.push_back(
static_cast<uint64_t>(
Value));
2794 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
#define CHECK_RESERVED_BITS_DESC(MASK, DESC)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, AMDGPU::OpName Name)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_KImmFP64(MCInst &Inst, uint64_t Imm, uint64_t Addr, const MCDisassembler *Decoder)
static SmallString< 32 > getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes)
Print a string describing the reserved bit range specified by Mask with offset BaseBytes for use in e...
#define DECODE_OPERAND_SREG_8(RegClass, OpWidth)
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_SREG_7(RegClass, OpWidth)
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecoderUInt128 eat12Bytes(ArrayRef< uint8_t > &Bytes)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, unsigned OpWidth, unsigned Imm, unsigned EncImm, const MCDisassembler *Decoder)
static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValBF16(unsigned Imm)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_8(RegClass)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecoderUInt128 eat16Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define CHECK_RESERVED_BITS(MASK)
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_MSG(MASK, MSG)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValF16(unsigned Imm)
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static Error createReservedKDBytesError(unsigned BaseInBytes, unsigned WidthInBytes)
Create an error object to return from onSymbolStart for reserved kernel descriptor bytes being set.
static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)
static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, const char *Msg="")
Create an error object to return from onSymbolStart for reserved kernel descriptor bits being set.
static void adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo &MRI, MCOperand &MO, uint8_t NumRegs)
Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the appropriate subregister fo...
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
Analysis containing CSE Info
#define LLVM_EXTERNAL_VISIBILITY
MachineInstr unsigned OpIdx
Interface definition for SIRegisterInfo.
MCOperand decodeLiteral64Constant() const
void convertVOPC64DPPInst(MCInst &MI) const
bool hasKernargPreload() const
void convertEXPInst(MCInst &MI) const
MCOperand createRegOperand(unsigned int RegId) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
Expected< bool > decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
Expected< bool > decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
void convertVOPCDPPInst(MCInst &MI) const
MCOperand decodeSpecialReg96Plus(unsigned Val) const
MCOperand decodeSDWASrc32(unsigned Val) const
void setABIVersion(unsigned Version) override
ELF-specific, set the ABI version from the object header.
Expected< bool > decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
unsigned getAgprClassId(unsigned Width) const
MCOperand decodeDpp8FI(unsigned Val) const
MCOperand decodeSDWASrc(unsigned Width, unsigned Val) const
void convertFMAanyK(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
MCOperand decodeBoolReg(unsigned Val) const
void convertDPP8Inst(MCInst &MI) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
MCOperand decodeVersionImm(unsigned Imm) const
Expected< bool > decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
MCOperand decodeSplitBarrier(unsigned Val) const
void convertVOP3DPPInst(MCInst &MI) const
void convertTrue16OpSel(MCInst &MI) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
Expected< bool > decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
MCOperand decodeNonVGPRSrcOp(unsigned Width, unsigned Val) const
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand decodeLiteralConstant(bool ExtendFP64) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
void convertVINTERPInst(MCInst &MI) const
void convertSDWAInst(MCInst &MI) const
MCOperand decodeSrcOp(unsigned Width, unsigned Val) const
unsigned getSgprClassId(unsigned Width) const
static MCOperand decodeIntImmed(unsigned Imm)
void convertWMMAInst(MCInst &MI) const
unsigned getVgprClassId(unsigned Width) const
void convertMAIInst(MCInst &MI) const
f8f6f4 instructions have different pseudos depending on the used formats.
bool hasArchitectedFlatScratch() const
unsigned getTtmpClassId(unsigned Width) const
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
MCOperand decodeMandatoryLiteral64Constant(uint64_t Imm) const
void convertMIMGInst(MCInst &MI) const
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
void convertVOP3PDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
MCOperand decodeSDWASrc16(unsigned Val) const
Expected< bool > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address) const override
Used to perform separate target specific disassembly for a particular symbol.
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
This class represents an Operation in the Expression.
Lightweight error class with error context and mandatory checking.
Tagged union holding either a T or a Error.
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
const MCRegisterInfo * getRegisterInfo() const
LLVM_ABI void reportWarning(SMLoc L, const Twine &Msg)
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Superclass for all disassemblers.
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Base class for the full range of assembler expressions which are needed for parsing.
Instances of this class represent a single low-level machine instruction.
unsigned getNumOperands() const
unsigned getOpcode() const
void addOperand(const MCOperand Op)
const MCOperand & getOperand(unsigned i) const
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
LLVM_ABI bool hasImplicitDefOfPhysReg(MCRegister Reg, const MCRegisterInfo *MRI=nullptr) const
Return true if this instruction implicitly defines the specified physical register.
Interface to description of machine instruction set.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Instances of this class represent operands of the MCInst class.
static MCOperand createExpr(const MCExpr *Val)
static MCOperand createReg(MCRegister Reg)
static MCOperand createImm(int64_t Val)
void setReg(MCRegister Reg)
Set the register number.
MCRegister getReg() const
Returns the register number.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const MCRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
uint16_t getEncodingValue(MCRegister Reg) const
Returns the encoding for Reg.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Wrapper class representing physical registers. Should be passed by value.
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Symbolize and annotate disassembled instructions.
Represents a location in source code.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
StringRef - Represent a constant reference to a string, i.e.
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM Value Representation.
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
A raw_ostream that writes to an SmallVector or SmallString.
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
ArrayRef< GFXVersion > getGFXVersions()
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isGFX10(const MCSubtargetInfo &STI)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isVOPC64DPP(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
const MFMA_F8F6F4_Info * getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA, unsigned FmtB, unsigned F8F8Opcode)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
bool hasGDS(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool isGFX1250(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool hasVOPD(const MCSubtargetInfo &STI)
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
@ C
The default llvm calling convention, compatible with C.
@ KERNEL_CODE_PROPERTIES_OFFSET
@ GROUP_SEGMENT_FIXED_SIZE_OFFSET
@ COMPUTE_PGM_RSRC3_OFFSET
@ KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET
@ COMPUTE_PGM_RSRC1_OFFSET
@ COMPUTE_PGM_RSRC2_OFFSET
@ PRIVATE_SEGMENT_FIXED_SIZE_OFFSET
uint16_t read16(const void *P, endianness E)
This is an optimization pass for GlobalISel generic memory operations.
int popcount(T Value) noexcept
Count the number of set bits in a value.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
LLVM_ABI raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Sub
Subtraction of integers.
std::vector< SymbolInfoTy > SectionSymbolsTy
unsigned M0(unsigned Val)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Description of the encoding of one expression Op.
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.