44#define DEBUG_TYPE "amdgpu-disassembler"
47 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
48 : AMDGPU::EncValues::SGPR_MAX_SI)
60 MAI(*Ctx.getAsmInfo()),
62 TargetMaxInstBytes(MAI.getMaxInstLength(&
STI)),
63 CodeObjectVersion(
AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
65 if (!
STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !
isGFX10Plus())
69 createConstantSymbolExpr(Symbol, Code);
71 UCVersionW64Expr = createConstantSymbolExpr(
"UC_VERSION_W64_BIT", 0x2000);
72 UCVersionW32Expr = createConstantSymbolExpr(
"UC_VERSION_W32_BIT", 0x4000);
73 UCVersionMDPExpr = createConstantSymbolExpr(
"UC_VERSION_MDP_BIT", 0x8000);
89 AMDGPU::OpName Name) {
90 int OpIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), Name);
107 if (DAsm->tryAddingSymbolicOperand(Inst,
Offset, Addr,
true, 2, 2, 0))
116 if (DAsm->isGFX12Plus()) {
118 }
else if (DAsm->isVI()) {
129 return addOperand(Inst, DAsm->decodeBoolReg(Inst, Val));
136 return addOperand(Inst, DAsm->decodeSplitBarrier(Inst, Val));
142 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
145#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
146 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
148 const MCDisassembler *Decoder) { \
149 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
150 return addOperand(Inst, DAsm->DecoderName(Imm)); \
155#define DECODE_OPERAND_REG_8(RegClass) \
156 static DecodeStatus Decode##RegClass##RegisterClass( \
157 MCInst &Inst, unsigned Imm, uint64_t , \
158 const MCDisassembler *Decoder) { \
159 assert(Imm < (1 << 8) && "8-bit encoding"); \
160 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
162 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
165#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm) \
166 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t , \
167 const MCDisassembler *Decoder) { \
168 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
169 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
170 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm)); \
174 unsigned OpWidth,
unsigned Imm,
unsigned EncImm,
176 assert(Imm < (1U << EncSize) &&
"Operand doesn't fit encoding!");
178 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm));
183#define DECODE_OPERAND_SREG_7(RegClass, OpWidth) \
184 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm)
186#define DECODE_OPERAND_SREG_8(RegClass, OpWidth) \
187 DECODE_SrcOp(Decode##RegClass##RegisterClass, 8, OpWidth, Imm)
193template <
unsigned OpW
idth>
201template <
unsigned OpW
idth>
205 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
211template <
unsigned OpW
idth>
214 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
219template <
unsigned OpW
idth>
223 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, Decoder);
231template <
unsigned OpW
idth>
235 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
240template <
unsigned OpW
idth>
244 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
292 assert((Imm & (1 << 8)) == 0 &&
"Imm{8} should not be used");
294 bool IsHi = Imm & (1 << 9);
295 unsigned RegIdx = Imm & 0xff;
297 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
305 bool IsHi = Imm & (1 << 7);
306 unsigned RegIdx = Imm & 0x7f;
308 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
311template <
unsigned OpW
idth>
319 bool IsHi = Imm & (1 << 7);
320 unsigned RegIdx = Imm & 0x7f;
321 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
323 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(Inst, OpWidth, Imm & 0xFF));
326template <
unsigned OpW
idth>
334 bool IsHi = Imm & (1 << 9);
335 unsigned RegIdx = Imm & 0xff;
336 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
338 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(Inst, OpWidth, Imm & 0xFF));
349 bool IsHi = Imm & (1 << 9);
350 unsigned RegIdx = Imm & 0xff;
351 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
358 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
365 return addOperand(Inst, DAsm->decodeMandatoryLiteral64Constant(Imm));
369 uint64_t Addr,
const void *Decoder) {
371 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
377 return addOperand(Inst, DAsm->decodeSrcOp(Inst, Opw, Imm | 256));
380template <
unsigned Opw>
390 assert(Imm < (1 << 9) &&
"9-bit encoding");
392 return addOperand(Inst, DAsm->decodeSrcOp(Inst, 64, Imm));
395#define DECODE_SDWA(DecName) \
396DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
406 return addOperand(Inst, DAsm->decodeVersionImm(Imm));
409#include "AMDGPUGenDisassemblerTables.inc"
413template <>
constexpr uint32_t InsnBitWidth<uint32_t> = 32;
414template <>
constexpr uint32_t InsnBitWidth<uint64_t> = 64;
415template <>
constexpr uint32_t InsnBitWidth<std::bitset<96>> = 96;
416template <>
constexpr uint32_t InsnBitWidth<std::bitset<128>> = 128;
423template <
typename InsnType>
431 const auto SavedBytes = Bytes;
438 decodeInstruction(Table, TmpInst, Inst,
Address,
this,
STI);
444 Comments << LocalComments;
451template <
typename InsnType>
456 for (
const uint8_t *
T : {Table1, Table2}) {
467 Bytes = Bytes.
slice(
sizeof(
T));
475 Bytes = Bytes.
slice(8);
477 Bytes = Bytes.
slice(4);
478 return (
Hi << 64) |
Lo;
485 Bytes = Bytes.
slice(8);
487 Bytes = Bytes.
slice(8);
488 return (
Hi << 64) |
Lo;
491void AMDGPUDisassembler::decodeImmOperands(
MCInst &
MI,
493 const MCInstrDesc &
Desc = MCII.get(
MI.getOpcode());
495 if (OpNo >=
MI.getNumOperands())
505 MCOperand &
Op =
MI.getOperand(OpNo);
508 int64_t
Imm =
Op.getImm();
523 switch (OpDesc.OperandType) {
557 unsigned MaxInstBytesNum = std::min((
size_t)TargetMaxInstBytes, Bytes_.
size());
558 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
562 Size = std::min((
size_t)4, Bytes_.
size());
574 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
599 if (
STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
601 Bytes = Bytes_.
slice(4, MaxInstBytesNum - 4);
609 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
611 }
else if (Bytes.size() >= 16 &&
612 STI.hasFeature(AMDGPU::FeatureGFX950Insts)) {
618 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
621 if (Bytes.size() >= 8) {
624 if (
STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
628 if (
STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&
632 if (
STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
639 if (
STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&
643 if (
STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
647 if (
STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
685 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
689 if (Bytes.size() >= 4) {
702 if (
STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
706 if (
STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
710 if (
STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
738 decodeImmOperands(
MI, *MCII);
750 else if (AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::dpp8) !=
762 AMDGPU::OpName::src2_modifiers);
765 if (
MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
766 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
769 AMDGPU::OpName::src2_modifiers);
777 if (MCII->get(
MI.getOpcode()).TSFlags &
779 int CPolPos = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
780 AMDGPU::OpName::cpol);
785 if (
MI.getNumOperands() <= (
unsigned)CPolPos) {
787 AMDGPU::OpName::cpol);
789 MI.getOperand(CPolPos).setImm(
MI.getOperand(CPolPos).getImm() | CPol);
794 if ((MCII->get(
MI.getOpcode()).TSFlags &
796 (
STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
799 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::tfe);
800 if (TFEOpIdx != -1) {
801 auto *TFEIter =
MI.begin();
802 std::advance(TFEIter, TFEOpIdx);
810 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::offset);
811 if (OffsetIdx != -1) {
812 uint32_t Imm =
MI.getOperand(OffsetIdx).getImm();
814 if (SignedOffset < 0)
819 if (MCII->get(
MI.getOpcode()).TSFlags &
822 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::swz);
823 if (SWZOpIdx != -1) {
824 auto *SWZIter =
MI.begin();
825 std::advance(SWZIter, SWZOpIdx);
833 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vaddr0);
835 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::srsrc);
836 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
837 if (VAddr0Idx >= 0 && NSAArgs > 0) {
838 unsigned NSAWords = (NSAArgs + 3) / 4;
839 if (Bytes.size() < 4 * NSAWords)
841 for (
unsigned i = 0; i < NSAArgs; ++i) {
842 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
844 MCII->getOpRegClassID(
Desc.operands()[VAddrIdx], HwModeRegClass);
847 Bytes = Bytes.slice(4 * NSAWords);
853 if (MCII->get(
MI.getOpcode()).TSFlags &
872 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
873 AMDGPU::OpName::vdst_in);
874 if (VDstIn_Idx != -1) {
875 int Tied = MCII->get(
MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
877 if (Tied != -1 && (
MI.getNumOperands() <= (
unsigned)VDstIn_Idx ||
878 !
MI.getOperand(VDstIn_Idx).isReg() ||
879 MI.getOperand(VDstIn_Idx).getReg() !=
MI.getOperand(Tied).getReg())) {
880 if (
MI.getNumOperands() > (
unsigned)VDstIn_Idx)
881 MI.erase(&
MI.getOperand(VDstIn_Idx));
884 AMDGPU::OpName::vdst_in);
896 MCII->get(
MI.getOpcode()).hasImplicitDefOfPhysReg(AMDGPU::EXEC)) {
897 auto ExecEncoding = MRI.getEncodingValue(AMDGPU::EXEC_LO);
898 if (Bytes_[0] != ExecEncoding)
902 Size = MaxInstBytesNum - Bytes.size();
907 if (
STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
917 if (
MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
918 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
919 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
920 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
921 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
922 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
923 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
924 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
925 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
926 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
927 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
928 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
929 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
930 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
931 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
932 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12) {
940 if (
STI.hasFeature(AMDGPU::FeatureGFX9) ||
941 STI.hasFeature(AMDGPU::FeatureGFX10)) {
945 }
else if (
STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
946 int SDst = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::sdst);
950 AMDGPU::OpName::sdst);
964 return MO.
setReg(
MRI.getSubReg(MO.
getReg(), AMDGPU::sub0_sub1_sub2_sub3));
967 MRI.getSubReg(MO.
getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5));
970 MO.
getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7)) {
978 BaseReg, AMDGPU::sub0, &
MRI.getRegClass(AMDGPU::VReg_384RegClassID));
996 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::blgp);
1001 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::cbsz);
1003 unsigned CBSZ =
MI.getOperand(CbszIdx).getImm();
1004 unsigned BLGP =
MI.getOperand(BlgpIdx).getImm();
1008 if (!AdjustedRegClassOpcode ||
1009 AdjustedRegClassOpcode->
Opcode ==
MI.getOpcode())
1012 MI.setOpcode(AdjustedRegClassOpcode->
Opcode);
1014 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
1016 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src1);
1025 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::matrix_a_fmt);
1030 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::matrix_b_fmt);
1032 unsigned FmtA =
MI.getOperand(FmtAIdx).getImm();
1033 unsigned FmtB =
MI.getOperand(FmtBIdx).getImm();
1037 if (!AdjustedRegClassOpcode ||
1038 AdjustedRegClassOpcode->
Opcode ==
MI.getOpcode())
1041 MI.setOpcode(AdjustedRegClassOpcode->
Opcode);
1043 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
1045 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src1);
1063 bool IsVOP3P =
false) {
1065 unsigned Opc =
MI.getOpcode();
1066 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
1067 AMDGPU::OpName::src1_modifiers,
1068 AMDGPU::OpName::src2_modifiers};
1069 for (
int J = 0; J < 3; ++J) {
1070 int OpIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
1074 unsigned Val =
MI.getOperand(
OpIdx).getImm();
1081 }
else if (J == 0) {
1092 const unsigned Opc =
MI.getOpcode();
1094 MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
1095 constexpr std::array<std::tuple<AMDGPU::OpName, AMDGPU::OpName, unsigned>, 4>
1096 OpAndOpMods = {{{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
1098 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
1100 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
1102 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
1104 for (
const auto &[
OpName, OpModsName, OpSelMask] : OpAndOpMods) {
1106 int OpModsIdx = AMDGPU::getNamedOperandIdx(
Opc, OpModsName);
1107 if (
OpIdx == -1 || OpModsIdx == -1)
1114 unsigned OpEnc = MRI.getEncodingValue(
Op.getReg());
1115 const MCOperand &OpMods =
MI.getOperand(OpModsIdx);
1116 unsigned ModVal = OpMods.
getImm();
1117 if (ModVal & OpSelMask) {
1127 constexpr int DST_IDX = 0;
1128 auto Opcode =
MI.getOpcode();
1129 const auto &
Desc = MCII->get(Opcode);
1130 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
1132 if (OldIdx != -1 &&
Desc.getOperandConstraint(
1136 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
1147 assert(
MI.getNumOperands() + 1 < MCII->get(
MI.getOpcode()).getNumOperands());
1150 AMDGPU::OpName::src2_modifiers);
1154 unsigned Opc =
MI.getOpcode();
1157 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vdst_in);
1158 if (VDstInIdx != -1)
1161 unsigned DescNumOps = MCII->get(
Opc).getNumOperands();
1162 if (
MI.getNumOperands() < DescNumOps &&
1167 AMDGPU::OpName::op_sel);
1170 if (
MI.getNumOperands() < DescNumOps &&
1173 AMDGPU::OpName::src0_modifiers);
1175 if (
MI.getNumOperands() < DescNumOps &&
1178 AMDGPU::OpName::src1_modifiers);
1186 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vdst_in);
1187 if (VDstInIdx != -1)
1190 unsigned Opc =
MI.getOpcode();
1191 unsigned DescNumOps = MCII->get(
Opc).getNumOperands();
1192 if (
MI.getNumOperands() < DescNumOps &&
1196 AMDGPU::OpName::op_sel);
1210 if (
MRI.getRegClass(AMDGPU::VGPR_32RegClassID).contains(Sub0))
1211 BaseReg = AMDGPU::VGPR0;
1212 else if (
MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Sub0))
1213 BaseReg = AMDGPU::AGPR0;
1215 assert(BaseReg &&
"Only vector registers expected");
1217 return (Sub0 - BaseReg + NumRegs <= 256) ?
Reg : AMDGPU::NoRegister;
1224 auto TSFlags = MCII->get(
MI.getOpcode()).TSFlags;
1226 int VDstIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1227 AMDGPU::OpName::vdst);
1229 int VDataIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1230 AMDGPU::OpName::vdata);
1232 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vaddr0);
1234 ? AMDGPU::OpName::srsrc
1235 : AMDGPU::OpName::rsrc;
1236 int RsrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), RsrcOpName);
1237 int DMaskIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1238 AMDGPU::OpName::dmask);
1240 int TFEIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1241 AMDGPU::OpName::tfe);
1242 int D16Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1243 AMDGPU::OpName::d16);
1250 if (BaseOpcode->
BVH) {
1256 bool IsAtomic = (VDstIdx != -1);
1260 bool IsPartialNSA =
false;
1261 unsigned AddrSize = Info->VAddrDwords;
1265 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::dim);
1267 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::a16);
1270 const bool IsA16 = (A16Idx != -1 &&
MI.getOperand(A16Idx).
getImm());
1277 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1278 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1279 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
1281 if (!IsVSample && AddrSize > 12)
1284 if (AddrSize > Info->VAddrDwords) {
1285 if (!
STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
1290 IsPartialNSA =
true;
1295 unsigned DMask =
MI.getOperand(DMaskIdx).getImm() & 0xf;
1296 unsigned DstSize = IsGather4 ? 4 : std::max(
llvm::popcount(DMask), 1);
1298 bool D16 = D16Idx >= 0 &&
MI.getOperand(D16Idx).getImm();
1300 DstSize = (DstSize + 1) / 2;
1303 if (TFEIdx != -1 &&
MI.getOperand(TFEIdx).getImm())
1306 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1311 if (NewOpcode == -1)
1316 if (DstSize != Info->VDataDwords) {
1317 auto DataRCID = MCII->getOpRegClassID(
1318 MCII->get(NewOpcode).operands()[VDataIdx], HwModeRegClass);
1322 MCRegister VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
1323 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1326 NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, &NewRC);
1337 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1339 if (
STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1340 AddrSize != Info->VAddrDwords) {
1341 MCRegister VAddrSA =
MI.getOperand(VAddrSAIdx).getReg();
1342 MCRegister VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
1343 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1345 auto AddrRCID = MCII->getOpRegClassID(
1346 MCII->get(NewOpcode).operands()[VAddrSAIdx], HwModeRegClass);
1349 NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0, &NewRC);
1355 MI.setOpcode(NewOpcode);
1357 if (NewVdata != AMDGPU::NoRegister) {
1369 assert(AddrSize <= Info->VAddrDwords);
1370 MI.erase(
MI.begin() + VAddr0Idx + AddrSize,
1371 MI.begin() + VAddr0Idx + Info->VAddrDwords);
1379 unsigned Opc =
MI.getOpcode();
1380 unsigned DescNumOps = MCII->get(
Opc).getNumOperands();
1383 if (
MI.getNumOperands() < DescNumOps &&
1387 if (
MI.getNumOperands() < DescNumOps &&
1390 AMDGPU::OpName::op_sel);
1391 if (
MI.getNumOperands() < DescNumOps &&
1394 AMDGPU::OpName::op_sel_hi);
1395 if (
MI.getNumOperands() < DescNumOps &&
1398 AMDGPU::OpName::neg_lo);
1399 if (
MI.getNumOperands() < DescNumOps &&
1402 AMDGPU::OpName::neg_hi);
1407 unsigned Opc =
MI.getOpcode();
1408 unsigned DescNumOps = MCII->get(
Opc).getNumOperands();
1410 if (
MI.getNumOperands() < DescNumOps &&
1414 if (
MI.getNumOperands() < DescNumOps &&
1417 AMDGPU::OpName::src0_modifiers);
1419 if (
MI.getNumOperands() < DescNumOps &&
1422 AMDGPU::OpName::src1_modifiers);
1426 unsigned Opc =
MI.getOpcode();
1427 unsigned DescNumOps = MCII->get(
Opc).getNumOperands();
1431 if (
MI.getNumOperands() < DescNumOps &&
1435 AMDGPU::OpName::op_sel);
1440 assert(HasLiteral &&
"Should have decoded a literal");
1451 const Twine& ErrMsg)
const {
1466 unsigned Val)
const {
1467 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1468 if (Val >= RegCl.getNumRegs())
1470 ": unknown register " +
Twine(Val));
1476 unsigned Val)
const {
1480 switch (SRegClassID) {
1481 case AMDGPU::SGPR_32RegClassID:
1482 case AMDGPU::TTMP_32RegClassID:
1484 case AMDGPU::SGPR_64RegClassID:
1485 case AMDGPU::TTMP_64RegClassID:
1488 case AMDGPU::SGPR_96RegClassID:
1489 case AMDGPU::TTMP_96RegClassID:
1490 case AMDGPU::SGPR_128RegClassID:
1491 case AMDGPU::TTMP_128RegClassID:
1494 case AMDGPU::SGPR_256RegClassID:
1495 case AMDGPU::TTMP_256RegClassID:
1498 case AMDGPU::SGPR_288RegClassID:
1499 case AMDGPU::TTMP_288RegClassID:
1500 case AMDGPU::SGPR_320RegClassID:
1501 case AMDGPU::TTMP_320RegClassID:
1502 case AMDGPU::SGPR_352RegClassID:
1503 case AMDGPU::TTMP_352RegClassID:
1504 case AMDGPU::SGPR_384RegClassID:
1505 case AMDGPU::TTMP_384RegClassID:
1506 case AMDGPU::SGPR_512RegClassID:
1507 case AMDGPU::TTMP_512RegClassID:
1516 if (Val % (1 << shift)) {
1518 <<
": scalar reg isn't aligned " << Val;
1526 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1536 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1538 return errOperand(Val,
"More than one unique literal is illegal");
1548 if (Literal64 != Val)
1549 return errOperand(Val,
"More than one unique literal is illegal");
1552 Literal = Literal64 = Val;
1554 bool UseLit64 =
Hi_32(Literal64) == 0;
1562 bool ExtendFP64)
const {
1567 if (Bytes.size() < 4) {
1568 return errOperand(0,
"cannot read literal, inst bytes left " +
1569 Twine(Bytes.size()));
1577 int64_t Val = ExtendFP64 ? Literal64 : Literal;
1579 bool CanUse64BitLiterals =
1580 STI.hasFeature(AMDGPU::Feature64BitLiterals) &&
1583 bool UseLit64 =
false;
1584 if (CanUse64BitLiterals) {
1591 UseLit64 =
Hi_32(Literal64) == 0;
1601 assert(
STI.hasFeature(AMDGPU::Feature64BitLiterals));
1604 if (Bytes.size() < 8) {
1605 return errOperand(0,
"cannot read literal64, inst bytes left " +
1606 Twine(Bytes.size()));
1612 bool UseLit64 =
false;
1622 UseLit64 =
Hi_32(Literal64) == 0;
1633 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1635 (
static_cast<int64_t
>(Imm) - INLINE_INTEGER_C_MIN) :
1636 (INLINE_INTEGER_C_POSITIVE_MAX -
static_cast<int64_t
>(Imm)));
1684 return 0x3fc45f306dc9c882;
1746 return VGPR_32RegClassID;
1748 return VReg_64RegClassID;
1750 return VReg_96RegClassID;
1752 return VReg_128RegClassID;
1754 return VReg_160RegClassID;
1756 return VReg_192RegClassID;
1758 return VReg_256RegClassID;
1760 return VReg_288RegClassID;
1762 return VReg_320RegClassID;
1764 return VReg_352RegClassID;
1766 return VReg_384RegClassID;
1768 return VReg_512RegClassID;
1770 return VReg_1024RegClassID;
1781 return AGPR_32RegClassID;
1783 return AReg_64RegClassID;
1785 return AReg_96RegClassID;
1787 return AReg_128RegClassID;
1789 return AReg_160RegClassID;
1791 return AReg_256RegClassID;
1793 return AReg_288RegClassID;
1795 return AReg_320RegClassID;
1797 return AReg_352RegClassID;
1799 return AReg_384RegClassID;
1801 return AReg_512RegClassID;
1803 return AReg_1024RegClassID;
1814 return SGPR_32RegClassID;
1816 return SGPR_64RegClassID;
1818 return SGPR_96RegClassID;
1820 return SGPR_128RegClassID;
1822 return SGPR_160RegClassID;
1824 return SGPR_256RegClassID;
1826 return SGPR_288RegClassID;
1828 return SGPR_320RegClassID;
1830 return SGPR_352RegClassID;
1832 return SGPR_384RegClassID;
1834 return SGPR_512RegClassID;
1845 return TTMP_32RegClassID;
1847 return TTMP_64RegClassID;
1849 return TTMP_128RegClassID;
1851 return TTMP_256RegClassID;
1853 return TTMP_288RegClassID;
1855 return TTMP_320RegClassID;
1857 return TTMP_352RegClassID;
1859 return TTMP_384RegClassID;
1861 return TTMP_512RegClassID;
1869 unsigned TTmpMin =
isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1870 unsigned TTmpMax =
isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1872 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1876 unsigned Val)
const {
1881 bool IsAGPR = Val & 512;
1884 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1893 unsigned Val)
const {
1896 assert(Val < (1 << 8) &&
"9-bit Src encoding when Val{8} is 0");
1901 static_assert(SGPR_MIN == 0);
1910 if ((INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) ||
1911 (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) ||
1912 Val == LITERAL_CONST)
1915 if (Val == LITERAL64_CONST &&
STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
1938 unsigned Val)
const {
1940 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::vdstX);
1943 unsigned XDstReg = MRI.getEncodingValue(Inst.
getOperand(VDstXInd).
getReg());
1944 Val |= ~XDstReg & 1;
2037 const unsigned Val)
const {
2041 if (
STI.hasFeature(AMDGPU::FeatureGFX9) ||
2042 STI.hasFeature(AMDGPU::FeatureGFX10)) {
2045 if (
int(SDWA9EncValues::SRC_VGPR_MIN) <=
int(Val) &&
2046 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
2048 Val - SDWA9EncValues::SRC_VGPR_MIN);
2050 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
2051 Val <= (
isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
2052 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
2054 Val - SDWA9EncValues::SRC_SGPR_MIN);
2056 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
2057 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
2059 Val - SDWA9EncValues::SRC_TTMP_MIN);
2062 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
2064 if ((INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) ||
2065 (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX))
2070 if (
STI.hasFeature(AMDGPU::FeatureVolcanicIslands))
2086 assert((
STI.hasFeature(AMDGPU::FeatureGFX9) ||
2087 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
2088 "SDWAVopcDst should be present only on GFX9+");
2090 bool IsWave32 =
STI.hasFeature(AMDGPU::FeatureWavefrontSize32);
2092 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
2093 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
2109 unsigned Val)
const {
2110 return STI.hasFeature(AMDGPU::FeatureWavefrontSize32)
2116 unsigned Val)
const {
2133 auto [
Version, W64, W32, MDP] = Encoding::decode(Imm);
2136 if (Encoding::encode(
Version, W64, W32, MDP) != Imm)
2146 if (
I == Versions.end())
2162 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2168 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2180 return STI.hasFeature(AMDGPU::FeatureGFX11);
2188 return STI.hasFeature(AMDGPU::FeatureGFX12);
2198 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2220 if (PopCount == 1) {
2221 S <<
"bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) <<
')';
2223 S <<
"bits in range ("
2224 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) <<
':'
2225 << (TrailingZeros + BaseBytes * CHAR_BIT) <<
')';
2231#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
2232#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2234 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
2236#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
2238 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
2239 << GET_FIELD(MASK) << '\n'; \
2242#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
2244 if (FourByteBuffer & (MASK)) { \
2245 return createStringError(std::errc::invalid_argument, \
2246 "kernel descriptor " DESC \
2247 " reserved %s set" MSG, \
2248 getBitRangeFromMask((MASK), 0).c_str()); \
2252#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
2253#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
2254 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
2255#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
2256 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
2257#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
2258 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
2271 uint32_t GranulatedWorkitemVGPRCount =
2272 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
2275 (GranulatedWorkitemVGPRCount + 1) *
2278 KdStream << Indent <<
".amdhsa_next_free_vgpr " << NextFreeVGPR <<
'\n';
2299 uint32_t GranulatedWavefrontSGPRCount =
2300 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
2304 "must be zero on gfx10+");
2306 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
2309 KdStream << Indent <<
".amdhsa_reserve_vcc " << 0 <<
'\n';
2311 KdStream << Indent <<
".amdhsa_reserve_flat_scratch " << 0 <<
'\n';
2312 bool ReservedXnackMask =
STI.hasFeature(AMDGPU::FeatureXNACK);
2313 assert(!ReservedXnackMask ||
STI.hasFeature(AMDGPU::FeatureSupportsXNACK));
2314 KdStream << Indent <<
".amdhsa_reserve_xnack_mask " << ReservedXnackMask
2316 KdStream << Indent <<
".amdhsa_next_free_sgpr " << NextFreeSGPR <<
"\n";
2321 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
2323 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
2325 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
2327 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
2333 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
2339 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
2346 PRINT_DIRECTIVE(
".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
2349 "COMPUTE_PGM_RSRC1",
"must be zero pre-gfx9");
2355 COMPUTE_PGM_RSRC1_GFX125_FLAT_SCRATCH_IS_NV);
2358 "COMPUTE_PGM_RSRC1");
2369 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
2371 PRINT_DIRECTIVE(
".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
2372 PRINT_DIRECTIVE(
".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
2375 "COMPUTE_PGM_RSRC1");
2380 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
2392 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2394 PRINT_DIRECTIVE(
".amdhsa_system_sgpr_private_segment_wavefront_offset",
2395 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2397 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2399 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2401 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2403 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2405 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2412 ".amdhsa_exception_fp_ieee_invalid_op",
2413 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2415 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2417 ".amdhsa_exception_fp_ieee_div_zero",
2418 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2420 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2422 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2424 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2426 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2439 KdStream << Indent <<
".amdhsa_accum_offset "
2440 << (
GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2443 PRINT_DIRECTIVE(
".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2446 "COMPUTE_PGM_RSRC3",
"must be zero on gfx90a");
2448 "COMPUTE_PGM_RSRC3",
"must be zero on gfx90a");
2452 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2454 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2457 "SHARED_VGPR_COUNT",
2458 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2462 "COMPUTE_PGM_RSRC3",
2463 "must be zero on gfx12+");
2469 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2471 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2473 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2476 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2479 "COMPUTE_PGM_RSRC3",
2480 "must be zero on gfx10");
2485 "COMPUTE_PGM_RSRC3",
"must be zero on gfx10+");
2490 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2493 "COMPUTE_PGM_RSRC3",
2494 "must be zero on gfx10 or gfx11");
2500 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT);
2502 "ENABLE_DYNAMIC_VGPR", COMPUTE_PGM_RSRC3_GFX125_ENABLE_DYNAMIC_VGPR);
2504 COMPUTE_PGM_RSRC3_GFX125_TCP_SPLIT);
2506 "ENABLE_DIDT_THROTTLE",
2507 COMPUTE_PGM_RSRC3_GFX125_ENABLE_DIDT_THROTTLE);
2510 "COMPUTE_PGM_RSRC3",
2511 "must be zero on gfx10+");
2516 "COMPUTE_PGM_RSRC3",
"must be zero on gfx10+");
2521 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2524 "COMPUTE_PGM_RSRC3",
2525 "must be zero on gfx10");
2527 }
else if (FourByteBuffer) {
2529 std::errc::invalid_argument,
2530 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2534#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2535#undef PRINT_DIRECTIVE
2537#undef CHECK_RESERVED_BITS_IMPL
2538#undef CHECK_RESERVED_BITS
2539#undef CHECK_RESERVED_BITS_MSG
2540#undef CHECK_RESERVED_BITS_DESC
2541#undef CHECK_RESERVED_BITS_DESC_MSG
2546 const char *Msg =
"") {
2548 std::errc::invalid_argument,
"kernel descriptor reserved %s set%s%s",
2555 unsigned WidthInBytes) {
2559 std::errc::invalid_argument,
2560 "kernel descriptor reserved bits in range (%u:%u) set",
2561 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2567#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2569 KdStream << Indent << DIRECTIVE " " \
2570 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2579 assert(Bytes.size() == 64);
2582 switch (Cursor.tell()) {
2584 FourByteBuffer = DE.
getU32(Cursor);
2585 KdStream << Indent <<
".amdhsa_group_segment_fixed_size " << FourByteBuffer
2590 FourByteBuffer = DE.
getU32(Cursor);
2591 KdStream << Indent <<
".amdhsa_private_segment_fixed_size "
2592 << FourByteBuffer <<
'\n';
2596 FourByteBuffer = DE.
getU32(Cursor);
2597 KdStream << Indent <<
".amdhsa_kernarg_size "
2598 << FourByteBuffer <<
'\n';
2603 ReservedBytes = DE.
getBytes(Cursor, 4);
2604 for (
int I = 0;
I < 4; ++
I) {
2605 if (ReservedBytes[
I] != 0)
2619 ReservedBytes = DE.
getBytes(Cursor, 20);
2620 for (
int I = 0;
I < 20; ++
I) {
2621 if (ReservedBytes[
I] != 0)
2627 FourByteBuffer = DE.
getU32(Cursor);
2631 FourByteBuffer = DE.
getU32(Cursor);
2635 FourByteBuffer = DE.
getU32(Cursor);
2640 TwoByteBuffer = DE.
getU16(Cursor);
2644 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2646 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2648 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2650 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2652 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2655 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2657 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2659 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2665 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2667 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2672 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2677 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2679 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2688 TwoByteBuffer = DE.
getU16(Cursor);
2689 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2691 KERNARG_PRELOAD_SPEC_LENGTH);
2694 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2696 KERNARG_PRELOAD_SPEC_OFFSET);
2702 ReservedBytes = DE.
getBytes(Cursor, 4);
2703 for (
int I = 0;
I < 4; ++
I) {
2704 if (ReservedBytes[
I] != 0)
2713#undef PRINT_DIRECTIVE
2720 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2722 "kernel descriptor must be 64-byte aligned");
2733 EnableWavefrontSize32 =
2735 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2740 KdStream <<
".amdhsa_kernel " << KdName <<
'\n';
2743 while (
C &&
C.tell() < Bytes.size()) {
2751 KdStream <<
".end_amdhsa_kernel\n";
2770 "code object v2 is not supported");
2783const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(
StringRef Id,
2786 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2794 if (!Valid || Res != Val)
2795 Ctx.reportWarning(
SMLoc(),
"unsupported redefinition of " + Id);
2801 const uint64_t TSFlags = MCII->get(
MI.getOpcode()).TSFlags;
2836 if (Result != Symbols->end()) {
2837 auto *Sym =
Ctx.getOrCreateSymbol(Result->Name);
2843 ReferencedAddresses.push_back(
static_cast<uint64_t>(
Value));
2862 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
unsigned const MachineRegisterInfo * MRI
MCDisassembler::DecodeStatus DecodeStatus
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define CHECK_RESERVED_BITS_DESC(MASK, DESC)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, AMDGPU::OpName Name)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_KImmFP64(MCInst &Inst, uint64_t Imm, uint64_t Addr, const MCDisassembler *Decoder)
static SmallString< 32 > getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes)
Print a string describing the reserved bit range specified by Mask with offset BaseBytes for use in e...
#define DECODE_OPERAND_SREG_8(RegClass, OpWidth)
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static std::bitset< 128 > eat16Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_SREG_7(RegClass, OpWidth)
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, unsigned OpWidth, unsigned Imm, unsigned EncImm, const MCDisassembler *Decoder)
static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValBF16(unsigned Imm)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_8(RegClass)
static unsigned CheckVGPROverflow(unsigned Reg, const MCRegisterClass &RC, const MCRegisterInfo &MRI)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define CHECK_RESERVED_BITS(MASK)
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_MSG(MASK, MSG)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValF16(unsigned Imm)
static std::bitset< 96 > eat12Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static Error createReservedKDBytesError(unsigned BaseInBytes, unsigned WidthInBytes)
Create an error object to return from onSymbolStart for reserved kernel descriptor bytes being set.
static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)
static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, const char *Msg="")
Create an error object to return from onSymbolStart for reserved kernel descriptor bits being set.
static void adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo &MRI, MCOperand &MO, uint8_t NumRegs)
Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the appropriate subregister fo...
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
#define LLVM_EXTERNAL_VISIBILITY
MachineInstr unsigned OpIdx
Interface definition for SIRegisterInfo.
MCOperand decodeNonVGPRSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const
void convertVOPC64DPPInst(MCInst &MI) const
bool isBufferInstruction(const MCInst &MI) const
Check if the instruction is a buffer operation (MUBUF, MTBUF, or S_BUFFER)
bool hasKernargPreload() const
void convertEXPInst(MCInst &MI) const
MCOperand createRegOperand(unsigned int RegId) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
Expected< bool > decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
MCOperand decodeSplitBarrier(const MCInst &Inst, unsigned Val) const
Expected< bool > decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
void convertVOPCDPPInst(MCInst &MI) const
MCOperand decodeLiteral64Constant(const MCInst &Inst) const
MCOperand decodeLiteralConstant(const MCInstrDesc &Desc, const MCOperandInfo &OpDesc, bool ExtendFP64) const
MCOperand decodeSpecialReg96Plus(unsigned Val) const
MCOperand decodeSDWASrc32(unsigned Val) const
void setABIVersion(unsigned Version) override
ELF-specific, set the ABI version from the object header.
Expected< bool > decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
unsigned getAgprClassId(unsigned Width) const
MCOperand decodeDpp8FI(unsigned Val) const
MCOperand decodeSDWASrc(unsigned Width, unsigned Val) const
void convertFMAanyK(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
void convertDPP8Inst(MCInst &MI) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
MCOperand decodeVersionImm(unsigned Imm) const
Expected< bool > decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
void convertVOP3DPPInst(MCInst &MI) const
void convertTrue16OpSel(MCInst &MI) const
MCOperand decodeSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
Expected< bool > decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
void convertVINTERPInst(MCInst &MI) const
void convertSDWAInst(MCInst &MI) const
unsigned getSgprClassId(unsigned Width) const
static MCOperand decodeIntImmed(unsigned Imm)
void convertWMMAInst(MCInst &MI) const
MCOperand decodeBoolReg(const MCInst &Inst, unsigned Val) const
unsigned getVgprClassId(unsigned Width) const
void convertMAIInst(MCInst &MI) const
f8f6f4 instructions have different pseudos depending on the used formats.
bool hasArchitectedFlatScratch() const
unsigned getTtmpClassId(unsigned Width) const
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
MCOperand decodeMandatoryLiteral64Constant(uint64_t Imm) const
void convertMIMGInst(MCInst &MI) const
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
void convertVOP3PDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
MCOperand decodeSDWASrc16(unsigned Val) const
Expected< bool > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address) const override
Used to perform separate target specific disassembly for a particular symbol.
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Lightweight error class with error context and mandatory checking.
Tagged union holding either a T or a Error.
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
const MCRegisterInfo * getRegisterInfo() const
Superclass for all disassemblers.
MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Base class for the full range of assembler expressions which are needed for parsing.
Instances of this class represent a single low-level machine instruction.
unsigned getNumOperands() const
unsigned getOpcode() const
void addOperand(const MCOperand Op)
const MCOperand & getOperand(unsigned i) const
Describe properties that are true of each instruction in the target description file.
Interface to description of machine instruction set.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
Instances of this class represent operands of the MCInst class.
static MCOperand createExpr(const MCExpr *Val)
static MCOperand createReg(MCRegister Reg)
static MCOperand createImm(int64_t Val)
void setReg(MCRegister Reg)
Set the register number.
MCRegister getReg() const
Returns the register number.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getSizeInBits() const
Return the size of the physical register in bits if we are able to determine it.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Generic base class for all target subtargets.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
bool isVariable() const
isVariable - Check if this is a variable symbol.
LLVM_ABI void setVariableValue(const MCExpr *Value)
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Symbolize and annotate disassembled instructions.
Represents a location in source code.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
StringRef - Represent a constant reference to a string, i.e.
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM Value Representation.
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
A raw_ostream that writes to an SmallVector or SmallString.
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
ArrayRef< GFXVersion > getGFXVersions()
EncodingField< Bit, Bit, D > EncodingBit
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isGFX10(const MCSubtargetInfo &STI)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool getSMEMIsBuffer(unsigned Opc)
bool isVOPC64DPP(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
const MFMA_F8F6F4_Info * getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA, unsigned FmtB, unsigned F8F8Opcode)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
bool hasGDS(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool isGFX1250(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool hasVOPD(const MCSubtargetInfo &STI)
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
@ C
The default llvm calling convention, compatible with C.
@ KERNEL_CODE_PROPERTIES_OFFSET
@ GROUP_SEGMENT_FIXED_SIZE_OFFSET
@ COMPUTE_PGM_RSRC3_OFFSET
@ KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET
@ COMPUTE_PGM_RSRC1_OFFSET
@ COMPUTE_PGM_RSRC2_OFFSET
@ PRIVATE_SEGMENT_FIXED_SIZE_OFFSET
value_type read(const void *memory, endianness endian)
Read a value of a particular endianness from memory.
uint16_t read16(const void *P, endianness E)
This is an optimization pass for GlobalISel generic memory operations.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
LLVM_ABI raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
FunctionAddr VTableAddr uintptr_t uintptr_t Version
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Target & getTheGCNTarget()
The target for GCN GPUs.
To bit_cast(const From &from) noexcept
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
std::vector< SymbolInfoTy > SectionSymbolsTy
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
int popcount(T Value) noexcept
Count the number of set bits in a value.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.