LLVM 22.0.0git
AMDGPUDisassembler.cpp
Go to the documentation of this file.
1//===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10//
11/// \file
12///
13/// This file contains definition for AMDGPU ISA disassembler
14//
15//===----------------------------------------------------------------------===//
16
17// ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18
21#include "SIDefines.h"
22#include "SIRegisterInfo.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCDecoder.h"
32#include "llvm/MC/MCExpr.h"
33#include "llvm/MC/MCInstrDesc.h"
39
40using namespace llvm;
41using namespace llvm::MCD;
42
43#define DEBUG_TYPE "amdgpu-disassembler"
44
45#define SGPR_MAX \
46 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
47 : AMDGPU::EncValues::SGPR_MAX_SI)
48
50
51static int64_t getInlineImmValF16(unsigned Imm);
52static int64_t getInlineImmValBF16(unsigned Imm);
53static int64_t getInlineImmVal32(unsigned Imm);
54static int64_t getInlineImmVal64(unsigned Imm);
55
57 MCContext &Ctx, MCInstrInfo const *MCII)
58 : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
59 MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
60 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
61 // ToDo: AMDGPUDisassembler supports only VI ISA.
62 if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
63 reportFatalUsageError("disassembly not yet supported for subtarget");
64
65 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
66 createConstantSymbolExpr(Symbol, Code);
67
68 UCVersionW64Expr = createConstantSymbolExpr("UC_VERSION_W64_BIT", 0x2000);
69 UCVersionW32Expr = createConstantSymbolExpr("UC_VERSION_W32_BIT", 0x4000);
70 UCVersionMDPExpr = createConstantSymbolExpr("UC_VERSION_MDP_BIT", 0x8000);
71}
72
76
78addOperand(MCInst &Inst, const MCOperand& Opnd) {
79 Inst.addOperand(Opnd);
80 return Opnd.isValid() ?
83}
84
86 AMDGPU::OpName Name) {
87 int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), Name);
88 if (OpIdx != -1) {
89 auto *I = MI.begin();
90 std::advance(I, OpIdx);
91 MI.insert(I, Op);
92 }
93 return OpIdx;
94}
95
96static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
97 uint64_t Addr,
98 const MCDisassembler *Decoder) {
99 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
100
101 // Our branches take a simm16.
102 int64_t Offset = SignExtend64<16>(Imm) * 4 + 4 + Addr;
103
104 if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
106 return addOperand(Inst, MCOperand::createImm(Imm));
107}
108
109static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
110 const MCDisassembler *Decoder) {
111 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
112 int64_t Offset;
113 if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
115 } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
116 Offset = Imm & 0xFFFFF;
117 } else { // GFX9+ supports 21-bit signed offsets.
119 }
121}
122
123static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
124 const MCDisassembler *Decoder) {
125 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
126 return addOperand(Inst, DAsm->decodeBoolReg(Val));
127}
128
129static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
130 uint64_t Addr,
131 const MCDisassembler *Decoder) {
132 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
133 return addOperand(Inst, DAsm->decodeSplitBarrier(Val));
134}
135
136static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr,
137 const MCDisassembler *Decoder) {
138 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
139 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
140}
141
142#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
143 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
144 uint64_t /*Addr*/, \
145 const MCDisassembler *Decoder) { \
146 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
147 return addOperand(Inst, DAsm->DecoderName(Imm)); \
148 }
149
150// Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
151// number of register. Used by VGPR only and AGPR only operands.
152#define DECODE_OPERAND_REG_8(RegClass) \
153 static DecodeStatus Decode##RegClass##RegisterClass( \
154 MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
155 const MCDisassembler *Decoder) { \
156 assert(Imm < (1 << 8) && "8-bit encoding"); \
157 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
158 return addOperand( \
159 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
160 }
161
162#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm) \
163 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
164 const MCDisassembler *Decoder) { \
165 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
166 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
167 return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm)); \
168 }
169
170static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize,
171 unsigned OpWidth, unsigned Imm, unsigned EncImm,
172 const MCDisassembler *Decoder) {
173 assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");
174 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
175 return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm));
176}
177
178// Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
179// get register class. Used by SGPR only operands.
180#define DECODE_OPERAND_SREG_7(RegClass, OpWidth) \
181 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm)
182
183#define DECODE_OPERAND_SREG_8(RegClass, OpWidth) \
184 DECODE_SrcOp(Decode##RegClass##RegisterClass, 8, OpWidth, Imm)
185
186// Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
187// Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
188// Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
189// Used by AV_ register classes (AGPR or VGPR only register operands).
190template <unsigned OpWidth>
191static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
192 const MCDisassembler *Decoder) {
193 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm | AMDGPU::EncValues::IS_VGPR,
194 Decoder);
195}
196
197// Decoder for Src(9-bit encoding) registers only.
198template <unsigned OpWidth>
199static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm,
200 uint64_t /* Addr */,
201 const MCDisassembler *Decoder) {
202 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
203}
204
205// Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
206// Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
207// only.
208template <unsigned OpWidth>
209static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
210 const MCDisassembler *Decoder) {
211 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
212}
213
214// Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
215// Imm{9} is acc, registers only.
216template <unsigned OpWidth>
217static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm,
218 uint64_t /* Addr */,
219 const MCDisassembler *Decoder) {
220 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, Decoder);
221}
222
223// Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
224// register from RegClass or immediate. Registers that don't belong to RegClass
225// will be decoded and InstPrinter will report warning. Immediate will be
226// decoded into constant matching the OperandType (important for floating point
227// types).
228template <unsigned OpWidth>
229static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm,
230 uint64_t /* Addr */,
231 const MCDisassembler *Decoder) {
232 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
233}
234
235// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
236// and decode using 'enum10' from decodeSrcOp.
237template <unsigned OpWidth>
238static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm,
239 uint64_t /* Addr */,
240 const MCDisassembler *Decoder) {
241 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
242}
243
244// Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
245// when RegisterClass is used as an operand. Most often used for destination
246// operands.
247
249DECODE_OPERAND_REG_8(VGPR_32_Lo128)
252DECODE_OPERAND_REG_8(VReg_128)
253DECODE_OPERAND_REG_8(VReg_192)
254DECODE_OPERAND_REG_8(VReg_256)
255DECODE_OPERAND_REG_8(VReg_288)
256DECODE_OPERAND_REG_8(VReg_320)
257DECODE_OPERAND_REG_8(VReg_352)
258DECODE_OPERAND_REG_8(VReg_384)
259DECODE_OPERAND_REG_8(VReg_512)
260DECODE_OPERAND_REG_8(VReg_1024)
261
262DECODE_OPERAND_SREG_7(SReg_32, 32)
263DECODE_OPERAND_SREG_7(SReg_32_XM0, 32)
264DECODE_OPERAND_SREG_7(SReg_32_XEXEC, 32)
265DECODE_OPERAND_SREG_7(SReg_32_XM0_XEXEC, 32)
266DECODE_OPERAND_SREG_7(SReg_32_XEXEC_HI, 32)
267DECODE_OPERAND_SREG_7(SReg_64_XEXEC, 64)
268DECODE_OPERAND_SREG_7(SReg_64_XEXEC_XNULL, 64)
269DECODE_OPERAND_SREG_7(SReg_96, 96)
270DECODE_OPERAND_SREG_7(SReg_128, 128)
271DECODE_OPERAND_SREG_7(SReg_128_XNULL, 128)
272DECODE_OPERAND_SREG_7(SReg_256, 256)
273DECODE_OPERAND_SREG_7(SReg_256_XNULL, 256)
274DECODE_OPERAND_SREG_7(SReg_512, 512)
275
276DECODE_OPERAND_SREG_8(SReg_64, 64)
277
280DECODE_OPERAND_REG_8(AReg_128)
281DECODE_OPERAND_REG_8(AReg_256)
282DECODE_OPERAND_REG_8(AReg_512)
283DECODE_OPERAND_REG_8(AReg_1024)
284
286 uint64_t /*Addr*/,
287 const MCDisassembler *Decoder) {
288 assert(isUInt<10>(Imm) && "10-bit encoding expected");
289 assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
290
291 bool IsHi = Imm & (1 << 9);
292 unsigned RegIdx = Imm & 0xff;
293 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
294 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
295}
296
297static DecodeStatus
299 const MCDisassembler *Decoder) {
300 assert(isUInt<8>(Imm) && "8-bit encoding expected");
301
302 bool IsHi = Imm & (1 << 7);
303 unsigned RegIdx = Imm & 0x7f;
304 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
305 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
306}
307
308template <unsigned OpWidth>
310 uint64_t /*Addr*/,
311 const MCDisassembler *Decoder) {
312 assert(isUInt<9>(Imm) && "9-bit encoding expected");
313
314 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
315 if (Imm & AMDGPU::EncValues::IS_VGPR) {
316 bool IsHi = Imm & (1 << 7);
317 unsigned RegIdx = Imm & 0x7f;
318 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
319 }
320 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(OpWidth, Imm & 0xFF));
321}
322
323template <unsigned OpWidth>
324static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
325 uint64_t /*Addr*/,
326 const MCDisassembler *Decoder) {
327 assert(isUInt<10>(Imm) && "10-bit encoding expected");
328
329 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
330 if (Imm & AMDGPU::EncValues::IS_VGPR) {
331 bool IsHi = Imm & (1 << 9);
332 unsigned RegIdx = Imm & 0xff;
333 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
334 }
335 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(OpWidth, Imm & 0xFF));
336}
337
338static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm,
339 uint64_t /*Addr*/,
340 const MCDisassembler *Decoder) {
341 assert(isUInt<10>(Imm) && "10-bit encoding expected");
342 assert(Imm & AMDGPU::EncValues::IS_VGPR && "VGPR expected");
343
344 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
345
346 bool IsHi = Imm & (1 << 9);
347 unsigned RegIdx = Imm & 0xff;
348 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
349}
350
351static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
352 uint64_t Addr,
353 const MCDisassembler *Decoder) {
354 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
355 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
356}
357
359 uint64_t Addr,
360 const MCDisassembler *Decoder) {
361 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
362 return addOperand(Inst, DAsm->decodeMandatoryLiteral64Constant(Imm));
363}
364
365static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
366 uint64_t Addr, const void *Decoder) {
367 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
368 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
369}
370
371static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw,
372 const MCDisassembler *Decoder) {
373 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
374 return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
375}
376
377template <unsigned Opw>
378static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
379 uint64_t /* Addr */,
380 const MCDisassembler *Decoder) {
381 return decodeAVLdSt(Inst, Imm, Opw, Decoder);
382}
383
384static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
385 uint64_t Addr,
386 const MCDisassembler *Decoder) {
387 assert(Imm < (1 << 9) && "9-bit encoding");
388 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
389 return addOperand(Inst, DAsm->decodeSrcOp(64, Imm));
390}
391
392#define DECODE_SDWA(DecName) \
393DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
394
395DECODE_SDWA(Src32)
396DECODE_SDWA(Src16)
397DECODE_SDWA(VopcDst)
398
399static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm,
400 uint64_t /* Addr */,
401 const MCDisassembler *Decoder) {
402 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
403 return addOperand(Inst, DAsm->decodeVersionImm(Imm));
404}
405
406#include "AMDGPUGenDisassemblerTables.inc"
407
408namespace {
409// Define bitwidths for various types used to instantiate the decoder.
410template <> constexpr uint32_t InsnBitWidth<uint32_t> = 32;
411template <> constexpr uint32_t InsnBitWidth<uint64_t> = 64;
412template <> constexpr uint32_t InsnBitWidth<std::bitset<96>> = 96;
413template <> constexpr uint32_t InsnBitWidth<std::bitset<128>> = 128;
414} // namespace
415
416//===----------------------------------------------------------------------===//
417//
418//===----------------------------------------------------------------------===//
419
420template <typename InsnType>
422 InsnType Inst, uint64_t Address,
423 raw_ostream &Comments) const {
424 assert(MI.getOpcode() == 0);
425 assert(MI.getNumOperands() == 0);
426 MCInst TmpInst;
427 HasLiteral = false;
428 const auto SavedBytes = Bytes;
429
430 SmallString<64> LocalComments;
431 raw_svector_ostream LocalCommentStream(LocalComments);
432 CommentStream = &LocalCommentStream;
433
434 DecodeStatus Res =
435 decodeInstruction(Table, TmpInst, Inst, Address, this, STI);
436
437 CommentStream = nullptr;
438
439 if (Res != MCDisassembler::Fail) {
440 MI = TmpInst;
441 Comments << LocalComments;
443 }
444 Bytes = SavedBytes;
446}
447
448template <typename InsnType>
451 MCInst &MI, InsnType Inst, uint64_t Address,
452 raw_ostream &Comments) const {
453 for (const uint8_t *T : {Table1, Table2}) {
454 if (DecodeStatus Res = tryDecodeInst(T, MI, Inst, Address, Comments))
455 return Res;
456 }
458}
459
460template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
461 assert(Bytes.size() >= sizeof(T));
462 const auto Res =
464 Bytes = Bytes.slice(sizeof(T));
465 return Res;
466}
467
468static inline std::bitset<96> eat12Bytes(ArrayRef<uint8_t> &Bytes) {
469 using namespace llvm::support::endian;
470 assert(Bytes.size() >= 12);
471 std::bitset<96> Lo(read<uint64_t, endianness::little>(Bytes.data()));
472 Bytes = Bytes.slice(8);
473 std::bitset<96> Hi(read<uint32_t, endianness::little>(Bytes.data()));
474 Bytes = Bytes.slice(4);
475 return (Hi << 64) | Lo;
476}
477
478static inline std::bitset<128> eat16Bytes(ArrayRef<uint8_t> &Bytes) {
479 using namespace llvm::support::endian;
480 assert(Bytes.size() >= 16);
481 std::bitset<128> Lo(read<uint64_t, endianness::little>(Bytes.data()));
482 Bytes = Bytes.slice(8);
483 std::bitset<128> Hi(read<uint64_t, endianness::little>(Bytes.data()));
484 Bytes = Bytes.slice(8);
485 return (Hi << 64) | Lo;
486}
487
488void AMDGPUDisassembler::decodeImmOperands(MCInst &MI,
489 const MCInstrInfo &MCII) const {
490 const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
491 for (auto [OpNo, OpDesc] : enumerate(Desc.operands())) {
492 if (OpNo >= MI.getNumOperands())
493 continue;
494
495 // TODO: Fix V_DUAL_FMAMK_F32_X_FMAAK_F32_gfx12 vsrc operands,
496 // defined to take VGPR_32, but in reality allowing inline constants.
497 bool IsSrc = AMDGPU::OPERAND_SRC_FIRST <= OpDesc.OperandType &&
498 OpDesc.OperandType <= AMDGPU::OPERAND_SRC_LAST;
499 if (!IsSrc && OpDesc.OperandType != MCOI::OPERAND_REGISTER)
500 continue;
501
502 MCOperand &Op = MI.getOperand(OpNo);
503 if (!Op.isImm())
504 continue;
505 int64_t Imm = Op.getImm();
508 Op = decodeIntImmed(Imm);
509 continue;
510 }
511
513 Op = decodeLiteralConstant(OpDesc.OperandType ==
515 continue;
516 }
517
520 switch (OpDesc.OperandType) {
526 break;
533 Imm = getInlineImmValF16(Imm);
534 break;
540 Imm = getInlineImmVal64(Imm);
541 break;
542 default:
543 Imm = getInlineImmVal32(Imm);
544 }
545 Op.setImm(Imm);
546 }
547 }
548}
549
551 ArrayRef<uint8_t> Bytes_,
553 raw_ostream &CS) const {
554 unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
555 Bytes = Bytes_.slice(0, MaxInstBytesNum);
556
557 // In case the opcode is not recognized we'll assume a Size of 4 bytes (unless
558 // there are fewer bytes left). This will be overridden on success.
559 Size = std::min((size_t)4, Bytes_.size());
560
561 do {
562 // ToDo: better to switch encoding length using some bit predicate
563 // but it is unknown yet, so try all we can
564
565 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
566 // encodings
567 if (isGFX1250() && Bytes.size() >= 16) {
568 std::bitset<128> DecW = eat16Bytes(Bytes);
569 if (tryDecodeInst(DecoderTableGFX1250128, MI, DecW, Address, CS))
570 break;
571 Bytes = Bytes_.slice(0, MaxInstBytesNum);
572 }
573
574 if (isGFX11Plus() && Bytes.size() >= 12) {
575 std::bitset<96> DecW = eat12Bytes(Bytes);
576
577 if (isGFX11() &&
578 tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
579 DecW, Address, CS))
580 break;
581
582 if (isGFX1250() &&
583 tryDecodeInst(DecoderTableGFX125096, DecoderTableGFX1250_FAKE1696, MI,
584 DecW, Address, CS))
585 break;
586
587 if (isGFX12() &&
588 tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
589 DecW, Address, CS))
590 break;
591
592 if (isGFX12() &&
593 tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
594 break;
595
596 if (STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
597 // Return 8 bytes for a potential literal.
598 Bytes = Bytes_.slice(4, MaxInstBytesNum - 4);
599
600 if (isGFX1250() &&
601 tryDecodeInst(DecoderTableGFX125096, MI, DecW, Address, CS))
602 break;
603 }
604
605 // Reinitialize Bytes
606 Bytes = Bytes_.slice(0, MaxInstBytesNum);
607
608 } else if (Bytes.size() >= 16 &&
609 STI.hasFeature(AMDGPU::FeatureGFX950Insts)) {
610 std::bitset<128> DecW = eat16Bytes(Bytes);
611 if (tryDecodeInst(DecoderTableGFX940128, MI, DecW, Address, CS))
612 break;
613
614 // Reinitialize Bytes
615 Bytes = Bytes_.slice(0, MaxInstBytesNum);
616 }
617
618 if (Bytes.size() >= 8) {
619 const uint64_t QW = eatBytes<uint64_t>(Bytes);
620
621 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
622 tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS))
623 break;
624
625 if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&
626 tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS))
627 break;
628
629 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
630 tryDecodeInst(DecoderTableGFX95064, MI, QW, Address, CS))
631 break;
632
633 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
634 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
635 // table first so we print the correct name.
636 if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&
637 tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS))
638 break;
639
640 if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
641 tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS))
642 break;
643
644 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
645 tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS))
646 break;
647
648 if ((isVI() || isGFX9()) &&
649 tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
650 break;
651
652 if (isGFX9() && tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS))
653 break;
654
655 if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
656 break;
657
658 if (isGFX1250() &&
659 tryDecodeInst(DecoderTableGFX125064, DecoderTableGFX1250_FAKE1664, MI,
660 QW, Address, CS))
661 break;
662
663 if (isGFX12() &&
664 tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
665 Address, CS))
666 break;
667
668 if (isGFX11() &&
669 tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
670 Address, CS))
671 break;
672
673 if (isGFX11() &&
674 tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
675 break;
676
677 if (isGFX12() &&
678 tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
679 break;
680
681 // Reinitialize Bytes
682 Bytes = Bytes_.slice(0, MaxInstBytesNum);
683 }
684
685 // Try decode 32-bit instruction
686 if (Bytes.size() >= 4) {
687 const uint32_t DW = eatBytes<uint32_t>(Bytes);
688
689 if ((isVI() || isGFX9()) &&
690 tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS))
691 break;
692
693 if (tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS))
694 break;
695
696 if (isGFX9() && tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS))
697 break;
698
699 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
700 tryDecodeInst(DecoderTableGFX95032, MI, DW, Address, CS))
701 break;
702
703 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
704 tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS))
705 break;
706
707 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
708 tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS))
709 break;
710
711 if (isGFX10() && tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
712 break;
713
714 if (isGFX11() &&
715 tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
716 Address, CS))
717 break;
718
719 if (isGFX1250() &&
720 tryDecodeInst(DecoderTableGFX125032, DecoderTableGFX1250_FAKE1632, MI,
721 DW, Address, CS))
722 break;
723
724 if (isGFX12() &&
725 tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
726 Address, CS))
727 break;
728 }
729
731 } while (false);
732
734
735 decodeImmOperands(MI, *MCII);
736
737 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DPP) {
738 if (isMacDPP(MI))
740
741 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
743 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
744 convertVOPCDPPInst(MI); // Special VOP3 case
745 else if (AMDGPU::isVOPC64DPP(MI.getOpcode()))
746 convertVOPC64DPPInst(MI); // Special VOP3 case
747 else if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8) !=
748 -1)
750 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3)
751 convertVOP3DPPInst(MI); // Regular VOP3 case
752 }
753
755
756 if (AMDGPU::isMAC(MI.getOpcode())) {
757 // Insert dummy unused src2_modifiers.
759 AMDGPU::OpName::src2_modifiers);
760 }
761
762 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
763 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
764 // Insert dummy unused src2_modifiers.
766 AMDGPU::OpName::src2_modifiers);
767 }
768
769 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
771 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
772 }
773
774 if (MCII->get(MI.getOpcode()).TSFlags &
776 int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
777 AMDGPU::OpName::cpol);
778 if (CPolPos != -1) {
779 unsigned CPol =
780 (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
782 if (MI.getNumOperands() <= (unsigned)CPolPos) {
784 AMDGPU::OpName::cpol);
785 } else if (CPol) {
786 MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
787 }
788 }
789 }
790
791 if ((MCII->get(MI.getOpcode()).TSFlags &
793 (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
794 // GFX90A lost TFE, its place is occupied by ACC.
795 int TFEOpIdx =
796 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
797 if (TFEOpIdx != -1) {
798 auto *TFEIter = MI.begin();
799 std::advance(TFEIter, TFEOpIdx);
800 MI.insert(TFEIter, MCOperand::createImm(0));
801 }
802 }
803
804 // Validate buffer instruction offsets for GFX12+ - must not be a negative.
806 int OffsetIdx =
807 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::offset);
808 if (OffsetIdx != -1) {
809 uint32_t Imm = MI.getOperand(OffsetIdx).getImm();
810 int64_t SignedOffset = SignExtend64<24>(Imm);
811 if (SignedOffset < 0)
813 }
814 }
815
816 if (MCII->get(MI.getOpcode()).TSFlags &
818 int SWZOpIdx =
819 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
820 if (SWZOpIdx != -1) {
821 auto *SWZIter = MI.begin();
822 std::advance(SWZIter, SWZOpIdx);
823 MI.insert(SWZIter, MCOperand::createImm(0));
824 }
825 }
826
827 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG) {
828 int VAddr0Idx =
829 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
830 int RsrcIdx =
831 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
832 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
833 if (VAddr0Idx >= 0 && NSAArgs > 0) {
834 unsigned NSAWords = (NSAArgs + 3) / 4;
835 if (Bytes.size() < 4 * NSAWords)
837 for (unsigned i = 0; i < NSAArgs; ++i) {
838 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
839 auto VAddrRCID =
840 MCII->get(MI.getOpcode()).operands()[VAddrIdx].RegClass;
841 MI.insert(MI.begin() + VAddrIdx, createRegOperand(VAddrRCID, Bytes[i]));
842 }
843 Bytes = Bytes.slice(4 * NSAWords);
844 }
845
847 }
848
849 if (MCII->get(MI.getOpcode()).TSFlags &
852
853 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP)
855
856 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP)
858
859 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SDWA)
861
862 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsMAI)
864
865 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsWMMA)
867
868 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
869 AMDGPU::OpName::vdst_in);
870 if (VDstIn_Idx != -1) {
871 int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
873 if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
874 !MI.getOperand(VDstIn_Idx).isReg() ||
875 MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
876 if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
877 MI.erase(&MI.getOperand(VDstIn_Idx));
879 MCOperand::createReg(MI.getOperand(Tied).getReg()),
880 AMDGPU::OpName::vdst_in);
881 }
882 }
883
884 bool IsSOPK = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
885 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::imm) && !IsSOPK)
887
888 // Some VOPC instructions, e.g., v_cmpx_f_f64, use VOP3 encoding and
889 // have EXEC as implicit destination. Issue a warning if encoding for
890 // vdst is not EXEC.
891 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3) &&
892 MCII->get(MI.getOpcode()).hasImplicitDefOfPhysReg(AMDGPU::EXEC)) {
893 auto ExecEncoding = MRI.getEncodingValue(AMDGPU::EXEC_LO);
894 if (Bytes_[0] != ExecEncoding)
896 }
897
898 Size = MaxInstBytesNum - Bytes.size();
899 return Status;
900}
901
903 if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
904 // The MCInst still has these fields even though they are no longer encoded
905 // in the GFX11 instruction.
906 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
907 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr);
908 }
909}
910
913 if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
914 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
915 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
916 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
917 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
918 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
919 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
920 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
921 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
922 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
923 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
924 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
925 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
926 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
927 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
928 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12) {
929 // The MCInst has this field that is not directly encoded in the
930 // instruction.
931 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
932 }
933}
934
936 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
937 STI.hasFeature(AMDGPU::FeatureGFX10)) {
938 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
939 // VOPC - insert clamp
940 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
941 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
942 int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
943 if (SDst != -1) {
944 // VOPC - insert VCC register as sdst
946 AMDGPU::OpName::sdst);
947 } else {
948 // VOP1/2 - insert omod if present in instruction
949 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
950 }
951 }
952}
953
954/// Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the
955/// appropriate subregister for the used format width.
957 MCOperand &MO, uint8_t NumRegs) {
958 switch (NumRegs) {
959 case 4:
960 return MO.setReg(MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3));
961 case 6:
962 return MO.setReg(
963 MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5));
964 case 8:
965 if (MCRegister NewReg = MRI.getSubReg(
966 MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7)) {
967 MO.setReg(NewReg);
968 }
969 return;
970 case 12: {
971 // There is no 384-bit subreg index defined.
972 MCRegister BaseReg = MRI.getSubReg(MO.getReg(), AMDGPU::sub0);
973 MCRegister NewReg = MRI.getMatchingSuperReg(
974 BaseReg, AMDGPU::sub0, &MRI.getRegClass(AMDGPU::VReg_384RegClassID));
975 return MO.setReg(NewReg);
976 }
977 case 16:
978 // No-op in cases where one operand is still f8/bf8.
979 return;
980 default:
981 llvm_unreachable("Unexpected size for mfma/wmma f8f6f4 operand");
982 }
983}
984
985/// f8f6f4 instructions have different pseudos depending on the used formats. In
986/// the disassembler table, we only have the variants with the largest register
987/// classes which assume using an fp8/bf8 format for both operands. The actual
988/// register class depends on the format in blgp and cbsz operands. Adjust the
989/// register classes depending on the used format.
991 int BlgpIdx =
992 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::blgp);
993 if (BlgpIdx == -1)
994 return;
995
996 int CbszIdx =
997 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::cbsz);
998
999 unsigned CBSZ = MI.getOperand(CbszIdx).getImm();
1000 unsigned BLGP = MI.getOperand(BlgpIdx).getImm();
1001
1002 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1003 AMDGPU::getMFMA_F8F6F4_WithFormatArgs(CBSZ, BLGP, MI.getOpcode());
1004 if (!AdjustedRegClassOpcode ||
1005 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1006 return;
1007
1008 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1009 int Src0Idx =
1010 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
1011 int Src1Idx =
1012 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
1013 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src0Idx),
1014 AdjustedRegClassOpcode->NumRegsSrcA);
1015 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src1Idx),
1016 AdjustedRegClassOpcode->NumRegsSrcB);
1017}
1018
1020 int FmtAIdx =
1021 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::matrix_a_fmt);
1022 if (FmtAIdx == -1)
1023 return;
1024
1025 int FmtBIdx =
1026 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::matrix_b_fmt);
1027
1028 unsigned FmtA = MI.getOperand(FmtAIdx).getImm();
1029 unsigned FmtB = MI.getOperand(FmtBIdx).getImm();
1030
1031 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1032 AMDGPU::getWMMA_F8F6F4_WithFormatArgs(FmtA, FmtB, MI.getOpcode());
1033 if (!AdjustedRegClassOpcode ||
1034 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1035 return;
1036
1037 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1038 int Src0Idx =
1039 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
1040 int Src1Idx =
1041 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
1042 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src0Idx),
1043 AdjustedRegClassOpcode->NumRegsSrcA);
1044 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src1Idx),
1045 AdjustedRegClassOpcode->NumRegsSrcB);
1046}
1047
1049 unsigned OpSel = 0;
1050 unsigned OpSelHi = 0;
1051 unsigned NegLo = 0;
1052 unsigned NegHi = 0;
1053};
1054
1055// Reconstruct values of VOP3/VOP3P operands such as op_sel.
1056// Note that these values do not affect disassembler output,
1057// so this is only necessary for consistency with src_modifiers.
1059 bool IsVOP3P = false) {
1060 VOPModifiers Modifiers;
1061 unsigned Opc = MI.getOpcode();
1062 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
1063 AMDGPU::OpName::src1_modifiers,
1064 AMDGPU::OpName::src2_modifiers};
1065 for (int J = 0; J < 3; ++J) {
1066 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
1067 if (OpIdx == -1)
1068 continue;
1069
1070 unsigned Val = MI.getOperand(OpIdx).getImm();
1071
1072 Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
1073 if (IsVOP3P) {
1074 Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
1075 Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
1076 Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
1077 } else if (J == 0) {
1078 Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
1079 }
1080 }
1081
1082 return Modifiers;
1083}
1084
1085// Instructions decode the op_sel/suffix bits into the src_modifier
1086// operands. Copy those bits into the src operands for true16 VGPRs.
1088 const unsigned Opc = MI.getOpcode();
1089 const MCRegisterClass &ConversionRC =
1090 MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
1091 constexpr std::array<std::tuple<AMDGPU::OpName, AMDGPU::OpName, unsigned>, 4>
1092 OpAndOpMods = {{{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
1094 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
1096 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
1098 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
1100 for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
1101 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
1102 int OpModsIdx = AMDGPU::getNamedOperandIdx(Opc, OpModsName);
1103 if (OpIdx == -1 || OpModsIdx == -1)
1104 continue;
1105 MCOperand &Op = MI.getOperand(OpIdx);
1106 if (!Op.isReg())
1107 continue;
1108 if (!ConversionRC.contains(Op.getReg()))
1109 continue;
1110 unsigned OpEnc = MRI.getEncodingValue(Op.getReg());
1111 const MCOperand &OpMods = MI.getOperand(OpModsIdx);
1112 unsigned ModVal = OpMods.getImm();
1113 if (ModVal & OpSelMask) { // isHi
1114 unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
1115 Op.setReg(ConversionRC.getRegister(RegIdx * 2 + 1));
1116 }
1117 }
1118}
1119
1120// MAC opcodes have special old and src2 operands.
1121// src2 is tied to dst, while old is not tied (but assumed to be).
1123 constexpr int DST_IDX = 0;
1124 auto Opcode = MI.getOpcode();
1125 const auto &Desc = MCII->get(Opcode);
1126 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
1127
1128 if (OldIdx != -1 && Desc.getOperandConstraint(
1129 OldIdx, MCOI::OperandConstraint::TIED_TO) == -1) {
1130 assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
1131 assert(Desc.getOperandConstraint(
1132 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
1134 (void)DST_IDX;
1135 return true;
1136 }
1137
1138 return false;
1139}
1140
1141// Create dummy old operand and insert dummy unused src2_modifiers
1143 assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
1144 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1146 AMDGPU::OpName::src2_modifiers);
1147}
1148
1150 unsigned Opc = MI.getOpcode();
1151
1152 int VDstInIdx =
1153 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
1154 if (VDstInIdx != -1)
1155 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
1156
1157 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1158 if (MI.getNumOperands() < DescNumOps &&
1159 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1161 auto Mods = collectVOPModifiers(MI);
1163 AMDGPU::OpName::op_sel);
1164 } else {
1165 // Insert dummy unused src modifiers.
1166 if (MI.getNumOperands() < DescNumOps &&
1167 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1169 AMDGPU::OpName::src0_modifiers);
1170
1171 if (MI.getNumOperands() < DescNumOps &&
1172 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1174 AMDGPU::OpName::src1_modifiers);
1175 }
1176}
1177
1180
1181 int VDstInIdx =
1182 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
1183 if (VDstInIdx != -1)
1184 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
1185
1186 unsigned Opc = MI.getOpcode();
1187 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1188 if (MI.getNumOperands() < DescNumOps &&
1189 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1190 auto Mods = collectVOPModifiers(MI);
1192 AMDGPU::OpName::op_sel);
1193 }
1194}
1195
1196// Given a wide tuple \p Reg check if it will overflow 256 registers.
1197// \returns \p Reg on success or NoRegister otherwise.
1198static unsigned CheckVGPROverflow(unsigned Reg, const MCRegisterClass &RC,
1199 const MCRegisterInfo &MRI) {
1200 unsigned NumRegs = RC.getSizeInBits() / 32;
1201 MCRegister Sub0 = MRI.getSubReg(Reg, AMDGPU::sub0);
1202 if (!Sub0)
1203 return Reg;
1204
1205 MCRegister BaseReg;
1206 if (MRI.getRegClass(AMDGPU::VGPR_32RegClassID).contains(Sub0))
1207 BaseReg = AMDGPU::VGPR0;
1208 else if (MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Sub0))
1209 BaseReg = AMDGPU::AGPR0;
1210
1211 assert(BaseReg && "Only vector registers expected");
1212
1213 return (Sub0 - BaseReg + NumRegs <= 256) ? Reg : AMDGPU::NoRegister;
1214}
1215
1216// Note that before gfx10, the MIMG encoding provided no information about
1217// VADDR size. Consequently, decoded instructions always show address as if it
1218// has 1 dword, which could be not really so.
1220 auto TSFlags = MCII->get(MI.getOpcode()).TSFlags;
1221
1222 int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1223 AMDGPU::OpName::vdst);
1224
1225 int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1226 AMDGPU::OpName::vdata);
1227 int VAddr0Idx =
1228 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
1229 AMDGPU::OpName RsrcOpName = (TSFlags & SIInstrFlags::MIMG)
1230 ? AMDGPU::OpName::srsrc
1231 : AMDGPU::OpName::rsrc;
1232 int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName);
1233 int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1234 AMDGPU::OpName::dmask);
1235
1236 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1237 AMDGPU::OpName::tfe);
1238 int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1239 AMDGPU::OpName::d16);
1240
1241 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
1242 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
1243 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
1244
1245 assert(VDataIdx != -1);
1246 if (BaseOpcode->BVH) {
1247 // Add A16 operand for intersect_ray instructions
1248 addOperand(MI, MCOperand::createImm(BaseOpcode->A16));
1249 return;
1250 }
1251
1252 bool IsAtomic = (VDstIdx != -1);
1253 bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
1254 bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
1255 bool IsNSA = false;
1256 bool IsPartialNSA = false;
1257 unsigned AddrSize = Info->VAddrDwords;
1258
1259 if (isGFX10Plus()) {
1260 unsigned DimIdx =
1261 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
1262 int A16Idx =
1263 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
1264 const AMDGPU::MIMGDimInfo *Dim =
1265 AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
1266 const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
1267
1268 AddrSize =
1269 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
1270
1271 // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
1272 // VIMAGE insts other than BVH never use vaddr4.
1273 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1274 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1275 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
1276 if (!IsNSA) {
1277 if (!IsVSample && AddrSize > 12)
1278 AddrSize = 16;
1279 } else {
1280 if (AddrSize > Info->VAddrDwords) {
1281 if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
1282 // The NSA encoding does not contain enough operands for the
1283 // combination of base opcode / dimension. Should this be an error?
1284 return;
1285 }
1286 IsPartialNSA = true;
1287 }
1288 }
1289 }
1290
1291 unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
1292 unsigned DstSize = IsGather4 ? 4 : std::max(llvm::popcount(DMask), 1);
1293
1294 bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
1295 if (D16 && AMDGPU::hasPackedD16(STI)) {
1296 DstSize = (DstSize + 1) / 2;
1297 }
1298
1299 if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())
1300 DstSize += 1;
1301
1302 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1303 return;
1304
1305 int NewOpcode =
1306 AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
1307 if (NewOpcode == -1)
1308 return;
1309
1310 // Widen the register to the correct number of enabled channels.
1311 MCRegister NewVdata;
1312 if (DstSize != Info->VDataDwords) {
1313 auto DataRCID = MCII->get(NewOpcode).operands()[VDataIdx].RegClass;
1314
1315 // Get first subregister of VData
1316 MCRegister Vdata0 = MI.getOperand(VDataIdx).getReg();
1317 MCRegister VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
1318 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1319
1320 const MCRegisterClass &NewRC = MRI.getRegClass(DataRCID);
1321 NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, &NewRC);
1322 NewVdata = CheckVGPROverflow(NewVdata, NewRC, MRI);
1323 if (!NewVdata) {
1324 // It's possible to encode this such that the low register + enabled
1325 // components exceeds the register count.
1326 return;
1327 }
1328 }
1329
1330 // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
1331 // If using partial NSA on GFX11+ widen last address register.
1332 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1333 MCRegister NewVAddrSA;
1334 if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1335 AddrSize != Info->VAddrDwords) {
1336 MCRegister VAddrSA = MI.getOperand(VAddrSAIdx).getReg();
1337 MCRegister VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
1338 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1339
1340 auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass;
1341 const MCRegisterClass &NewRC = MRI.getRegClass(AddrRCID);
1342 NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0, &NewRC);
1343 NewVAddrSA = CheckVGPROverflow(NewVAddrSA, NewRC, MRI);
1344 if (!NewVAddrSA)
1345 return;
1346 }
1347
1348 MI.setOpcode(NewOpcode);
1349
1350 if (NewVdata != AMDGPU::NoRegister) {
1351 MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
1352
1353 if (IsAtomic) {
1354 // Atomic operations have an additional operand (a copy of data)
1355 MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
1356 }
1357 }
1358
1359 if (NewVAddrSA) {
1360 MI.getOperand(VAddrSAIdx) = MCOperand::createReg(NewVAddrSA);
1361 } else if (IsNSA) {
1362 assert(AddrSize <= Info->VAddrDwords);
1363 MI.erase(MI.begin() + VAddr0Idx + AddrSize,
1364 MI.begin() + VAddr0Idx + Info->VAddrDwords);
1365 }
1366}
1367
1368// Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1369// decoder only adds to src_modifiers, so manually add the bits to the other
1370// operands.
1372 unsigned Opc = MI.getOpcode();
1373 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1374 auto Mods = collectVOPModifiers(MI, true);
1375
1376 if (MI.getNumOperands() < DescNumOps &&
1377 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
1378 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in);
1379
1380 if (MI.getNumOperands() < DescNumOps &&
1381 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel))
1383 AMDGPU::OpName::op_sel);
1384 if (MI.getNumOperands() < DescNumOps &&
1385 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel_hi))
1387 AMDGPU::OpName::op_sel_hi);
1388 if (MI.getNumOperands() < DescNumOps &&
1389 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_lo))
1391 AMDGPU::OpName::neg_lo);
1392 if (MI.getNumOperands() < DescNumOps &&
1393 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi))
1395 AMDGPU::OpName::neg_hi);
1396}
1397
1398// Create dummy old operand and insert optional operands
1400 unsigned Opc = MI.getOpcode();
1401 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1402
1403 if (MI.getNumOperands() < DescNumOps &&
1404 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::old))
1405 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1406
1407 if (MI.getNumOperands() < DescNumOps &&
1408 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1410 AMDGPU::OpName::src0_modifiers);
1411
1412 if (MI.getNumOperands() < DescNumOps &&
1413 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1415 AMDGPU::OpName::src1_modifiers);
1416}
1417
1419 unsigned Opc = MI.getOpcode();
1420 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1421
1423
1424 if (MI.getNumOperands() < DescNumOps &&
1425 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1428 AMDGPU::OpName::op_sel);
1429 }
1430}
1431
1433 assert(HasLiteral && "Should have decoded a literal");
1434 insertNamedMCOperand(MI, MCOperand::createImm(Literal), AMDGPU::OpName::immX);
1435}
1436
1437const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
1438 return getContext().getRegisterInfo()->
1439 getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
1440}
1441
1442inline
1444 const Twine& ErrMsg) const {
1445 *CommentStream << "Error: " + ErrMsg;
1446
1447 // ToDo: add support for error operands to MCInst.h
1448 // return MCOperand::createError(V);
1449 return MCOperand();
1450}
1451
1452inline
1455}
1456
1457inline
1459 unsigned Val) const {
1460 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1461 if (Val >= RegCl.getNumRegs())
1462 return errOperand(Val, Twine(getRegClassName(RegClassID)) +
1463 ": unknown register " + Twine(Val));
1464 return createRegOperand(RegCl.getRegister(Val));
1465}
1466
1467inline
1469 unsigned Val) const {
1470 // ToDo: SI/CI have 104 SGPRs, VI - 102
1471 // Valery: here we accepting as much as we can, let assembler sort it out
1472 int shift = 0;
1473 switch (SRegClassID) {
1474 case AMDGPU::SGPR_32RegClassID:
1475 case AMDGPU::TTMP_32RegClassID:
1476 break;
1477 case AMDGPU::SGPR_64RegClassID:
1478 case AMDGPU::TTMP_64RegClassID:
1479 shift = 1;
1480 break;
1481 case AMDGPU::SGPR_96RegClassID:
1482 case AMDGPU::TTMP_96RegClassID:
1483 case AMDGPU::SGPR_128RegClassID:
1484 case AMDGPU::TTMP_128RegClassID:
1485 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
1486 // this bundle?
1487 case AMDGPU::SGPR_256RegClassID:
1488 case AMDGPU::TTMP_256RegClassID:
1489 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
1490 // this bundle?
1491 case AMDGPU::SGPR_288RegClassID:
1492 case AMDGPU::TTMP_288RegClassID:
1493 case AMDGPU::SGPR_320RegClassID:
1494 case AMDGPU::TTMP_320RegClassID:
1495 case AMDGPU::SGPR_352RegClassID:
1496 case AMDGPU::TTMP_352RegClassID:
1497 case AMDGPU::SGPR_384RegClassID:
1498 case AMDGPU::TTMP_384RegClassID:
1499 case AMDGPU::SGPR_512RegClassID:
1500 case AMDGPU::TTMP_512RegClassID:
1501 shift = 2;
1502 break;
1503 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
1504 // this bundle?
1505 default:
1506 llvm_unreachable("unhandled register class");
1507 }
1508
1509 if (Val % (1 << shift)) {
1510 *CommentStream << "Warning: " << getRegClassName(SRegClassID)
1511 << ": scalar reg isn't aligned " << Val;
1512 }
1513
1514 return createRegOperand(SRegClassID, Val >> shift);
1515}
1516
1518 bool IsHi) const {
1519 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1520 return createRegOperand(AMDGPU::VGPR_16RegClassID, RegIdxInVGPR16);
1521}
1522
1523// Decode Literals for insts which always have a literal in the encoding
1526 if (HasLiteral) {
1527 assert(
1529 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1530 if (Literal != Val)
1531 return errOperand(Val, "More than one unique literal is illegal");
1532 }
1533 HasLiteral = true;
1534 Literal = Val;
1535 return MCOperand::createImm(Literal);
1536}
1537
1540 if (HasLiteral) {
1541 if (Literal64 != Val)
1542 return errOperand(Val, "More than one unique literal is illegal");
1543 }
1544 HasLiteral = true;
1545 Literal = Literal64 = Val;
1546 return MCOperand::createImm(Literal64);
1547}
1548
1550 // For now all literal constants are supposed to be unsigned integer
1551 // ToDo: deal with signed/unsigned 64-bit integer constants
1552 // ToDo: deal with float/double constants
1553 if (!HasLiteral) {
1554 if (Bytes.size() < 4) {
1555 return errOperand(0, "cannot read literal, inst bytes left " +
1556 Twine(Bytes.size()));
1557 }
1558 HasLiteral = true;
1559 Literal = Literal64 = eatBytes<uint32_t>(Bytes);
1560 if (ExtendFP64)
1561 Literal64 <<= 32;
1562 }
1563 return MCOperand::createImm(ExtendFP64 ? Literal64 : Literal);
1564}
1565
1567 assert(STI.hasFeature(AMDGPU::Feature64BitLiterals));
1568
1569 if (!HasLiteral) {
1570 if (Bytes.size() < 8) {
1571 return errOperand(0, "cannot read literal64, inst bytes left " +
1572 Twine(Bytes.size()));
1573 }
1574 HasLiteral = true;
1575 Literal64 = eatBytes<uint64_t>(Bytes);
1576 }
1577 return MCOperand::createImm(Literal64);
1578}
1579
1581 using namespace AMDGPU::EncValues;
1582
1583 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1584 return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1585 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1586 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1587 // Cast prevents negative overflow.
1588}
1589
1590static int64_t getInlineImmVal32(unsigned Imm) {
1591 switch (Imm) {
1592 case 240:
1593 return llvm::bit_cast<uint32_t>(0.5f);
1594 case 241:
1595 return llvm::bit_cast<uint32_t>(-0.5f);
1596 case 242:
1597 return llvm::bit_cast<uint32_t>(1.0f);
1598 case 243:
1599 return llvm::bit_cast<uint32_t>(-1.0f);
1600 case 244:
1601 return llvm::bit_cast<uint32_t>(2.0f);
1602 case 245:
1603 return llvm::bit_cast<uint32_t>(-2.0f);
1604 case 246:
1605 return llvm::bit_cast<uint32_t>(4.0f);
1606 case 247:
1607 return llvm::bit_cast<uint32_t>(-4.0f);
1608 case 248: // 1 / (2 * PI)
1609 return 0x3e22f983;
1610 default:
1611 llvm_unreachable("invalid fp inline imm");
1612 }
1613}
1614
1615static int64_t getInlineImmVal64(unsigned Imm) {
1616 switch (Imm) {
1617 case 240:
1618 return llvm::bit_cast<uint64_t>(0.5);
1619 case 241:
1620 return llvm::bit_cast<uint64_t>(-0.5);
1621 case 242:
1622 return llvm::bit_cast<uint64_t>(1.0);
1623 case 243:
1624 return llvm::bit_cast<uint64_t>(-1.0);
1625 case 244:
1626 return llvm::bit_cast<uint64_t>(2.0);
1627 case 245:
1628 return llvm::bit_cast<uint64_t>(-2.0);
1629 case 246:
1630 return llvm::bit_cast<uint64_t>(4.0);
1631 case 247:
1632 return llvm::bit_cast<uint64_t>(-4.0);
1633 case 248: // 1 / (2 * PI)
1634 return 0x3fc45f306dc9c882;
1635 default:
1636 llvm_unreachable("invalid fp inline imm");
1637 }
1638}
1639
1640static int64_t getInlineImmValF16(unsigned Imm) {
1641 switch (Imm) {
1642 case 240:
1643 return 0x3800;
1644 case 241:
1645 return 0xB800;
1646 case 242:
1647 return 0x3C00;
1648 case 243:
1649 return 0xBC00;
1650 case 244:
1651 return 0x4000;
1652 case 245:
1653 return 0xC000;
1654 case 246:
1655 return 0x4400;
1656 case 247:
1657 return 0xC400;
1658 case 248: // 1 / (2 * PI)
1659 return 0x3118;
1660 default:
1661 llvm_unreachable("invalid fp inline imm");
1662 }
1663}
1664
1665static int64_t getInlineImmValBF16(unsigned Imm) {
1666 switch (Imm) {
1667 case 240:
1668 return 0x3F00;
1669 case 241:
1670 return 0xBF00;
1671 case 242:
1672 return 0x3F80;
1673 case 243:
1674 return 0xBF80;
1675 case 244:
1676 return 0x4000;
1677 case 245:
1678 return 0xC000;
1679 case 246:
1680 return 0x4080;
1681 case 247:
1682 return 0xC080;
1683 case 248: // 1 / (2 * PI)
1684 return 0x3E22;
1685 default:
1686 llvm_unreachable("invalid fp inline imm");
1687 }
1688}
1689
1690unsigned AMDGPUDisassembler::getVgprClassId(unsigned Width) const {
1691 using namespace AMDGPU;
1692
1693 switch (Width) {
1694 case 16:
1695 case 32:
1696 return VGPR_32RegClassID;
1697 case 64:
1698 return VReg_64RegClassID;
1699 case 96:
1700 return VReg_96RegClassID;
1701 case 128:
1702 return VReg_128RegClassID;
1703 case 160:
1704 return VReg_160RegClassID;
1705 case 192:
1706 return VReg_192RegClassID;
1707 case 256:
1708 return VReg_256RegClassID;
1709 case 288:
1710 return VReg_288RegClassID;
1711 case 320:
1712 return VReg_320RegClassID;
1713 case 352:
1714 return VReg_352RegClassID;
1715 case 384:
1716 return VReg_384RegClassID;
1717 case 512:
1718 return VReg_512RegClassID;
1719 case 1024:
1720 return VReg_1024RegClassID;
1721 }
1722 llvm_unreachable("Invalid register width!");
1723}
1724
1725unsigned AMDGPUDisassembler::getAgprClassId(unsigned Width) const {
1726 using namespace AMDGPU;
1727
1728 switch (Width) {
1729 case 16:
1730 case 32:
1731 return AGPR_32RegClassID;
1732 case 64:
1733 return AReg_64RegClassID;
1734 case 96:
1735 return AReg_96RegClassID;
1736 case 128:
1737 return AReg_128RegClassID;
1738 case 160:
1739 return AReg_160RegClassID;
1740 case 256:
1741 return AReg_256RegClassID;
1742 case 288:
1743 return AReg_288RegClassID;
1744 case 320:
1745 return AReg_320RegClassID;
1746 case 352:
1747 return AReg_352RegClassID;
1748 case 384:
1749 return AReg_384RegClassID;
1750 case 512:
1751 return AReg_512RegClassID;
1752 case 1024:
1753 return AReg_1024RegClassID;
1754 }
1755 llvm_unreachable("Invalid register width!");
1756}
1757
1758unsigned AMDGPUDisassembler::getSgprClassId(unsigned Width) const {
1759 using namespace AMDGPU;
1760
1761 switch (Width) {
1762 case 16:
1763 case 32:
1764 return SGPR_32RegClassID;
1765 case 64:
1766 return SGPR_64RegClassID;
1767 case 96:
1768 return SGPR_96RegClassID;
1769 case 128:
1770 return SGPR_128RegClassID;
1771 case 160:
1772 return SGPR_160RegClassID;
1773 case 256:
1774 return SGPR_256RegClassID;
1775 case 288:
1776 return SGPR_288RegClassID;
1777 case 320:
1778 return SGPR_320RegClassID;
1779 case 352:
1780 return SGPR_352RegClassID;
1781 case 384:
1782 return SGPR_384RegClassID;
1783 case 512:
1784 return SGPR_512RegClassID;
1785 }
1786 llvm_unreachable("Invalid register width!");
1787}
1788
1789unsigned AMDGPUDisassembler::getTtmpClassId(unsigned Width) const {
1790 using namespace AMDGPU;
1791
1792 switch (Width) {
1793 case 16:
1794 case 32:
1795 return TTMP_32RegClassID;
1796 case 64:
1797 return TTMP_64RegClassID;
1798 case 128:
1799 return TTMP_128RegClassID;
1800 case 256:
1801 return TTMP_256RegClassID;
1802 case 288:
1803 return TTMP_288RegClassID;
1804 case 320:
1805 return TTMP_320RegClassID;
1806 case 352:
1807 return TTMP_352RegClassID;
1808 case 384:
1809 return TTMP_384RegClassID;
1810 case 512:
1811 return TTMP_512RegClassID;
1812 }
1813 llvm_unreachable("Invalid register width!");
1814}
1815
1816int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1817 using namespace AMDGPU::EncValues;
1818
1819 unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1820 unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1821
1822 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1823}
1824
1825MCOperand AMDGPUDisassembler::decodeSrcOp(unsigned Width, unsigned Val) const {
1826 using namespace AMDGPU::EncValues;
1827
1828 assert(Val < 1024); // enum10
1829
1830 bool IsAGPR = Val & 512;
1831 Val &= 511;
1832
1833 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1834 return createRegOperand(IsAGPR ? getAgprClassId(Width)
1835 : getVgprClassId(Width), Val - VGPR_MIN);
1836 }
1837 return decodeNonVGPRSrcOp(Width, Val & 0xFF);
1838}
1839
1841 unsigned Val) const {
1842 // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
1843 // decoded earlier.
1844 assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
1845 using namespace AMDGPU::EncValues;
1846
1847 if (Val <= SGPR_MAX) {
1848 // "SGPR_MIN <= Val" is always true and causes compilation warning.
1849 static_assert(SGPR_MIN == 0);
1850 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
1851 }
1852
1853 int TTmpIdx = getTTmpIdx(Val);
1854 if (TTmpIdx >= 0) {
1855 return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
1856 }
1857
1858 if ((INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) ||
1859 (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) ||
1860 Val == LITERAL_CONST)
1861 return MCOperand::createImm(Val);
1862
1863 if (Val == LITERAL64_CONST && STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
1864 return decodeLiteral64Constant();
1865 }
1866
1867 switch (Width) {
1868 case 32:
1869 case 16:
1870 return decodeSpecialReg32(Val);
1871 case 64:
1872 return decodeSpecialReg64(Val);
1873 case 96:
1874 case 128:
1875 case 256:
1876 case 512:
1877 return decodeSpecialReg96Plus(Val);
1878 default:
1879 llvm_unreachable("unexpected immediate type");
1880 }
1881}
1882
1883// Bit 0 of DstY isn't stored in the instruction, because it's always the
1884// opposite of bit 0 of DstX.
1886 unsigned Val) const {
1887 int VDstXInd =
1888 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
1889 assert(VDstXInd != -1);
1890 assert(Inst.getOperand(VDstXInd).isReg());
1891 unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg());
1892 Val |= ~XDstReg & 1;
1893 return createRegOperand(getVgprClassId(32), Val);
1894}
1895
1897 using namespace AMDGPU;
1898
1899 switch (Val) {
1900 // clang-format off
1901 case 102: return createRegOperand(FLAT_SCR_LO);
1902 case 103: return createRegOperand(FLAT_SCR_HI);
1903 case 104: return createRegOperand(XNACK_MASK_LO);
1904 case 105: return createRegOperand(XNACK_MASK_HI);
1905 case 106: return createRegOperand(VCC_LO);
1906 case 107: return createRegOperand(VCC_HI);
1907 case 108: return createRegOperand(TBA_LO);
1908 case 109: return createRegOperand(TBA_HI);
1909 case 110: return createRegOperand(TMA_LO);
1910 case 111: return createRegOperand(TMA_HI);
1911 case 124:
1912 return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0);
1913 case 125:
1914 return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
1915 case 126: return createRegOperand(EXEC_LO);
1916 case 127: return createRegOperand(EXEC_HI);
1917 case 230: return createRegOperand(SRC_FLAT_SCRATCH_BASE_LO);
1918 case 231: return createRegOperand(SRC_FLAT_SCRATCH_BASE_HI);
1919 case 235: return createRegOperand(SRC_SHARED_BASE_LO);
1920 case 236: return createRegOperand(SRC_SHARED_LIMIT_LO);
1921 case 237: return createRegOperand(SRC_PRIVATE_BASE_LO);
1922 case 238: return createRegOperand(SRC_PRIVATE_LIMIT_LO);
1923 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1924 case 251: return createRegOperand(SRC_VCCZ);
1925 case 252: return createRegOperand(SRC_EXECZ);
1926 case 253: return createRegOperand(SRC_SCC);
1927 case 254: return createRegOperand(LDS_DIRECT);
1928 default: break;
1929 // clang-format on
1930 }
1931 return errOperand(Val, "unknown operand encoding " + Twine(Val));
1932}
1933
1935 using namespace AMDGPU;
1936
1937 switch (Val) {
1938 case 102: return createRegOperand(FLAT_SCR);
1939 case 104: return createRegOperand(XNACK_MASK);
1940 case 106: return createRegOperand(VCC);
1941 case 108: return createRegOperand(TBA);
1942 case 110: return createRegOperand(TMA);
1943 case 124:
1944 if (isGFX11Plus())
1945 return createRegOperand(SGPR_NULL);
1946 break;
1947 case 125:
1948 if (!isGFX11Plus())
1949 return createRegOperand(SGPR_NULL);
1950 break;
1951 case 126: return createRegOperand(EXEC);
1952 case 230: return createRegOperand(SRC_FLAT_SCRATCH_BASE_LO);
1953 case 235: return createRegOperand(SRC_SHARED_BASE);
1954 case 236: return createRegOperand(SRC_SHARED_LIMIT);
1955 case 237: return createRegOperand(SRC_PRIVATE_BASE);
1956 case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
1957 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1958 case 251: return createRegOperand(SRC_VCCZ);
1959 case 252: return createRegOperand(SRC_EXECZ);
1960 case 253: return createRegOperand(SRC_SCC);
1961 default: break;
1962 }
1963 return errOperand(Val, "unknown operand encoding " + Twine(Val));
1964}
1965
1967 using namespace AMDGPU;
1968
1969 switch (Val) {
1970 case 124:
1971 if (isGFX11Plus())
1972 return createRegOperand(SGPR_NULL);
1973 break;
1974 case 125:
1975 if (!isGFX11Plus())
1976 return createRegOperand(SGPR_NULL);
1977 break;
1978 default:
1979 break;
1980 }
1981 return errOperand(Val, "unknown operand encoding " + Twine(Val));
1982}
1983
1985 const unsigned Val) const {
1986 using namespace AMDGPU::SDWA;
1987 using namespace AMDGPU::EncValues;
1988
1989 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
1990 STI.hasFeature(AMDGPU::FeatureGFX10)) {
1991 // XXX: cast to int is needed to avoid stupid warning:
1992 // compare with unsigned is always true
1993 if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
1994 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
1995 return createRegOperand(getVgprClassId(Width),
1996 Val - SDWA9EncValues::SRC_VGPR_MIN);
1997 }
1998 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
1999 Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
2000 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
2001 return createSRegOperand(getSgprClassId(Width),
2002 Val - SDWA9EncValues::SRC_SGPR_MIN);
2003 }
2004 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
2005 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
2006 return createSRegOperand(getTtmpClassId(Width),
2007 Val - SDWA9EncValues::SRC_TTMP_MIN);
2008 }
2009
2010 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
2011
2012 if ((INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) ||
2013 (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX))
2014 return MCOperand::createImm(SVal);
2015
2016 return decodeSpecialReg32(SVal);
2017 }
2018 if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands))
2019 return createRegOperand(getVgprClassId(Width), Val);
2020 llvm_unreachable("unsupported target");
2021}
2022
2024 return decodeSDWASrc(16, Val);
2025}
2026
2028 return decodeSDWASrc(32, Val);
2029}
2030
2032 using namespace AMDGPU::SDWA;
2033
2034 assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
2035 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
2036 "SDWAVopcDst should be present only on GFX9+");
2037
2038 bool IsWave32 = STI.hasFeature(AMDGPU::FeatureWavefrontSize32);
2039
2040 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
2041 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
2042
2043 int TTmpIdx = getTTmpIdx(Val);
2044 if (TTmpIdx >= 0) {
2045 auto TTmpClsId = getTtmpClassId(IsWave32 ? 32 : 64);
2046 return createSRegOperand(TTmpClsId, TTmpIdx);
2047 }
2048 if (Val > SGPR_MAX) {
2049 return IsWave32 ? decodeSpecialReg32(Val) : decodeSpecialReg64(Val);
2050 }
2051 return createSRegOperand(getSgprClassId(IsWave32 ? 32 : 64), Val);
2052 }
2053 return createRegOperand(IsWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC);
2054}
2055
2057 return STI.hasFeature(AMDGPU::FeatureWavefrontSize32) ? decodeSrcOp(32, Val)
2058 : decodeSrcOp(64, Val);
2059}
2060
2062 return decodeSrcOp(32, Val);
2063}
2064
2067 return MCOperand();
2068 return MCOperand::createImm(Val);
2069}
2070
2072 using VersionField = AMDGPU::EncodingField<7, 0>;
2073 using W64Bit = AMDGPU::EncodingBit<13>;
2074 using W32Bit = AMDGPU::EncodingBit<14>;
2075 using MDPBit = AMDGPU::EncodingBit<15>;
2077
2078 auto [Version, W64, W32, MDP] = Encoding::decode(Imm);
2079
2080 // Decode into a plain immediate if any unused bits are raised.
2081 if (Encoding::encode(Version, W64, W32, MDP) != Imm)
2082 return MCOperand::createImm(Imm);
2083
2084 const auto &Versions = AMDGPU::UCVersion::getGFXVersions();
2085 const auto *I = find_if(
2086 Versions, [Version = Version](const AMDGPU::UCVersion::GFXVersion &V) {
2087 return V.Code == Version;
2088 });
2089 MCContext &Ctx = getContext();
2090 const MCExpr *E;
2091 if (I == Versions.end())
2093 else
2094 E = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(I->Symbol), Ctx);
2095
2096 if (W64)
2097 E = MCBinaryExpr::createOr(E, UCVersionW64Expr, Ctx);
2098 if (W32)
2099 E = MCBinaryExpr::createOr(E, UCVersionW32Expr, Ctx);
2100 if (MDP)
2101 E = MCBinaryExpr::createOr(E, UCVersionMDPExpr, Ctx);
2102
2103 return MCOperand::createExpr(E);
2104}
2105
2107 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2108}
2109
2111
2113 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2114}
2115
2117
2119
2123
2125 return STI.hasFeature(AMDGPU::FeatureGFX11);
2126}
2127
2131
2133 return STI.hasFeature(AMDGPU::FeatureGFX12);
2134}
2135
2139
2141
2143 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2144}
2145
2149
2150//===----------------------------------------------------------------------===//
2151// AMDGPU specific symbol handling
2152//===----------------------------------------------------------------------===//
2153
2154/// Print a string describing the reserved bit range specified by Mask with
2155/// offset BaseBytes for use in error comments. Mask is a single continuous
2156/// range of 1s surrounded by zeros. The format here is meant to align with the
2157/// tables that describe these bits in llvm.org/docs/AMDGPUUsage.html.
2158static SmallString<32> getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes) {
2159 SmallString<32> Result;
2160 raw_svector_ostream S(Result);
2161
2162 int TrailingZeros = llvm::countr_zero(Mask);
2163 int PopCount = llvm::popcount(Mask);
2164
2165 if (PopCount == 1) {
2166 S << "bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2167 } else {
2168 S << "bits in range ("
2169 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) << ':'
2170 << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2171 }
2172
2173 return Result;
2174}
2175
2176#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
2177#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2178 do { \
2179 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
2180 } while (0)
2181#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
2182 do { \
2183 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
2184 << GET_FIELD(MASK) << '\n'; \
2185 } while (0)
2186
2187#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
2188 do { \
2189 if (FourByteBuffer & (MASK)) { \
2190 return createStringError(std::errc::invalid_argument, \
2191 "kernel descriptor " DESC \
2192 " reserved %s set" MSG, \
2193 getBitRangeFromMask((MASK), 0).c_str()); \
2194 } \
2195 } while (0)
2196
2197#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
2198#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
2199 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
2200#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
2201 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
2202#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
2203 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
2204
2205// NOLINTNEXTLINE(readability-identifier-naming)
2207 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2208 using namespace amdhsa;
2209 StringRef Indent = "\t";
2210
2211 // We cannot accurately backward compute #VGPRs used from
2212 // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
2213 // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
2214 // simply calculate the inverse of what the assembler does.
2215
2216 uint32_t GranulatedWorkitemVGPRCount =
2217 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
2218
2219 uint32_t NextFreeVGPR =
2220 (GranulatedWorkitemVGPRCount + 1) *
2221 AMDGPU::IsaInfo::getVGPREncodingGranule(&STI, EnableWavefrontSize32);
2222
2223 KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
2224
2225 // We cannot backward compute values used to calculate
2226 // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
2227 // directives can't be computed:
2228 // .amdhsa_reserve_vcc
2229 // .amdhsa_reserve_flat_scratch
2230 // .amdhsa_reserve_xnack_mask
2231 // They take their respective default values if not specified in the assembly.
2232 //
2233 // GRANULATED_WAVEFRONT_SGPR_COUNT
2234 // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
2235 //
2236 // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
2237 // are set to 0. So while disassembling we consider that:
2238 //
2239 // GRANULATED_WAVEFRONT_SGPR_COUNT
2240 // = f(NEXT_FREE_SGPR + 0 + 0 + 0)
2241 //
2242 // The disassembler cannot recover the original values of those 3 directives.
2243
2244 uint32_t GranulatedWavefrontSGPRCount =
2245 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
2246
2247 if (isGFX10Plus())
2248 CHECK_RESERVED_BITS_MSG(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
2249 "must be zero on gfx10+");
2250
2251 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
2253
2254 KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
2256 KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
2257 KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n';
2258 KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
2259
2260 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY);
2261
2262 PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
2263 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
2264 PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
2265 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
2266 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
2267 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
2268 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
2269 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
2270
2271 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV);
2272
2273 if (!isGFX12Plus())
2274 PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
2275 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
2276
2277 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE);
2278
2279 if (!isGFX12Plus())
2280 PRINT_DIRECTIVE(".amdhsa_ieee_mode",
2281 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
2282
2283 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_BULKY);
2284 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_CDBG_USER);
2285
2286 // Bits [26].
2287 if (isGFX9Plus()) {
2288 PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
2289 } else {
2290 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0,
2291 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx9");
2292 }
2293
2294 // Bits [27].
2295 if (isGFX1250()) {
2296 PRINT_PSEUDO_DIRECTIVE_COMMENT("FLAT_SCRATCH_IS_NV",
2297 COMPUTE_PGM_RSRC1_GFX125_FLAT_SCRATCH_IS_NV);
2298 } else {
2299 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX120_RESERVED1,
2300 "COMPUTE_PGM_RSRC1");
2301 }
2302
2303 // Bits [28].
2304 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED2, "COMPUTE_PGM_RSRC1");
2305
2306 // Bits [29-31].
2307 if (isGFX10Plus()) {
2308 // WGP_MODE is not available on GFX1250.
2309 if (!isGFX1250()) {
2310 PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
2311 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
2312 }
2313 PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
2314 PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
2315 } else {
2316 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED3,
2317 "COMPUTE_PGM_RSRC1");
2318 }
2319
2320 if (isGFX12Plus())
2321 PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
2322 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
2323
2324 return true;
2325}
2326
2327// NOLINTNEXTLINE(readability-identifier-naming)
2329 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2330 using namespace amdhsa;
2331 StringRef Indent = "\t";
2333 PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
2334 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2335 else
2336 PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
2337 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2338 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
2339 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2340 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
2341 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2342 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
2343 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2344 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
2345 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2346 PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
2347 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2348
2349 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH);
2350 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY);
2351 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE);
2352
2354 ".amdhsa_exception_fp_ieee_invalid_op",
2355 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2356 PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
2357 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2359 ".amdhsa_exception_fp_ieee_div_zero",
2360 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2361 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
2362 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2363 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
2364 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2365 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
2366 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2367 PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
2368 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2369
2370 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC2_RESERVED0, "COMPUTE_PGM_RSRC2");
2371
2372 return true;
2373}
2374
2375// NOLINTNEXTLINE(readability-identifier-naming)
2377 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2378 using namespace amdhsa;
2379 StringRef Indent = "\t";
2380 if (isGFX90A()) {
2381 KdStream << Indent << ".amdhsa_accum_offset "
2382 << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2383 << '\n';
2384
2385 PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2386
2387 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED0,
2388 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2389 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED1,
2390 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2391 } else if (isGFX10Plus()) {
2392 // Bits [0-3].
2393 if (!isGFX12Plus()) {
2394 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2395 PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
2396 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2397 } else {
2399 "SHARED_VGPR_COUNT",
2400 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2401 }
2402 } else {
2403 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0,
2404 "COMPUTE_PGM_RSRC3",
2405 "must be zero on gfx12+");
2406 }
2407
2408 // Bits [4-11].
2409 if (isGFX11()) {
2410 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2411 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2412 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
2413 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2414 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
2415 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2416 } else if (isGFX12Plus()) {
2417 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2418 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2419 } else {
2420 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1,
2421 "COMPUTE_PGM_RSRC3",
2422 "must be zero on gfx10");
2423 }
2424
2425 // Bits [12].
2426 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2,
2427 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2428
2429 // Bits [13].
2430 if (isGFX12Plus()) {
2432 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2433 } else {
2434 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3,
2435 "COMPUTE_PGM_RSRC3",
2436 "must be zero on gfx10 or gfx11");
2437 }
2438
2439 // Bits [14-21].
2440 if (isGFX1250()) {
2441 PRINT_DIRECTIVE(".amdhsa_named_barrier_count",
2442 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT);
2444 "ENABLE_DYNAMIC_VGPR", COMPUTE_PGM_RSRC3_GFX125_ENABLE_DYNAMIC_VGPR);
2446 COMPUTE_PGM_RSRC3_GFX125_TCP_SPLIT);
2448 "ENABLE_DIDT_THROTTLE",
2449 COMPUTE_PGM_RSRC3_GFX125_ENABLE_DIDT_THROTTLE);
2450 } else {
2451 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX120_RESERVED4,
2452 "COMPUTE_PGM_RSRC3",
2453 "must be zero on gfx10+");
2454 }
2455
2456 // Bits [22-30].
2457 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED5,
2458 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2459
2460 // Bits [31].
2461 if (isGFX11Plus()) {
2463 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2464 } else {
2465 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED6,
2466 "COMPUTE_PGM_RSRC3",
2467 "must be zero on gfx10");
2468 }
2469 } else if (FourByteBuffer) {
2470 return createStringError(
2471 std::errc::invalid_argument,
2472 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2473 }
2474 return true;
2475}
2476#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2477#undef PRINT_DIRECTIVE
2478#undef GET_FIELD
2479#undef CHECK_RESERVED_BITS_IMPL
2480#undef CHECK_RESERVED_BITS
2481#undef CHECK_RESERVED_BITS_MSG
2482#undef CHECK_RESERVED_BITS_DESC
2483#undef CHECK_RESERVED_BITS_DESC_MSG
2484
2485/// Create an error object to return from onSymbolStart for reserved kernel
2486/// descriptor bits being set.
2487static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes,
2488 const char *Msg = "") {
2489 return createStringError(
2490 std::errc::invalid_argument, "kernel descriptor reserved %s set%s%s",
2491 getBitRangeFromMask(Mask, BaseBytes).c_str(), *Msg ? ", " : "", Msg);
2492}
2493
2494/// Create an error object to return from onSymbolStart for reserved kernel
2495/// descriptor bytes being set.
2496static Error createReservedKDBytesError(unsigned BaseInBytes,
2497 unsigned WidthInBytes) {
2498 // Create an error comment in the same format as the "Kernel Descriptor"
2499 // table here: https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor .
2500 return createStringError(
2501 std::errc::invalid_argument,
2502 "kernel descriptor reserved bits in range (%u:%u) set",
2503 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2504}
2505
2508 raw_string_ostream &KdStream) const {
2509#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2510 do { \
2511 KdStream << Indent << DIRECTIVE " " \
2512 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2513 } while (0)
2514
2515 uint16_t TwoByteBuffer = 0;
2516 uint32_t FourByteBuffer = 0;
2517
2518 StringRef ReservedBytes;
2519 StringRef Indent = "\t";
2520
2521 assert(Bytes.size() == 64);
2522 DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
2523
2524 switch (Cursor.tell()) {
2526 FourByteBuffer = DE.getU32(Cursor);
2527 KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2528 << '\n';
2529 return true;
2530
2532 FourByteBuffer = DE.getU32(Cursor);
2533 KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2534 << FourByteBuffer << '\n';
2535 return true;
2536
2538 FourByteBuffer = DE.getU32(Cursor);
2539 KdStream << Indent << ".amdhsa_kernarg_size "
2540 << FourByteBuffer << '\n';
2541 return true;
2542
2544 // 4 reserved bytes, must be 0.
2545 ReservedBytes = DE.getBytes(Cursor, 4);
2546 for (int I = 0; I < 4; ++I) {
2547 if (ReservedBytes[I] != 0)
2549 }
2550 return true;
2551
2553 // KERNEL_CODE_ENTRY_BYTE_OFFSET
2554 // So far no directive controls this for Code Object V3, so simply skip for
2555 // disassembly.
2556 DE.skip(Cursor, 8);
2557 return true;
2558
2560 // 20 reserved bytes, must be 0.
2561 ReservedBytes = DE.getBytes(Cursor, 20);
2562 for (int I = 0; I < 20; ++I) {
2563 if (ReservedBytes[I] != 0)
2565 }
2566 return true;
2567
2569 FourByteBuffer = DE.getU32(Cursor);
2570 return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
2571
2573 FourByteBuffer = DE.getU32(Cursor);
2574 return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
2575
2577 FourByteBuffer = DE.getU32(Cursor);
2578 return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
2579
2581 using namespace amdhsa;
2582 TwoByteBuffer = DE.getU16(Cursor);
2583
2585 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2586 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2587 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2588 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2589 PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2590 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2591 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2592 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2593 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2594 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2596 PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2597 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2598 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2599 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2600
2601 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2602 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED0,
2604
2605 // Reserved for GFX9
2606 if (isGFX9() &&
2607 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2609 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2610 amdhsa::KERNEL_CODE_PROPERTIES_OFFSET, "must be zero on gfx9");
2611 }
2612 if (isGFX10Plus()) {
2613 PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2614 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2615 }
2616
2617 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
2618 PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
2619 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2620
2621 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2622 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED1,
2624 }
2625
2626 return true;
2627
2629 using namespace amdhsa;
2630 TwoByteBuffer = DE.getU16(Cursor);
2631 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2632 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2633 KERNARG_PRELOAD_SPEC_LENGTH);
2634 }
2635
2636 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2637 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2638 KERNARG_PRELOAD_SPEC_OFFSET);
2639 }
2640 return true;
2641
2643 // 4 bytes from here are reserved, must be 0.
2644 ReservedBytes = DE.getBytes(Cursor, 4);
2645 for (int I = 0; I < 4; ++I) {
2646 if (ReservedBytes[I] != 0)
2648 }
2649 return true;
2650
2651 default:
2652 llvm_unreachable("Unhandled index. Case statements cover everything.");
2653 return true;
2654 }
2655#undef PRINT_DIRECTIVE
2656}
2657
2659 StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2660
2661 // CP microcode requires the kernel descriptor to be 64 aligned.
2662 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2663 return createStringError(std::errc::invalid_argument,
2664 "kernel descriptor must be 64-byte aligned");
2665
2666 // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
2667 // requires us to know the setting of .amdhsa_wavefront_size32 in order to
2668 // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
2669 // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
2670 // when required.
2671 if (isGFX10Plus()) {
2672 uint16_t KernelCodeProperties =
2675 EnableWavefrontSize32 =
2676 AMDHSA_BITS_GET(KernelCodeProperties,
2677 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2678 }
2679
2680 std::string Kd;
2681 raw_string_ostream KdStream(Kd);
2682 KdStream << ".amdhsa_kernel " << KdName << '\n';
2683
2685 while (C && C.tell() < Bytes.size()) {
2686 Expected<bool> Res = decodeKernelDescriptorDirective(C, Bytes, KdStream);
2687
2688 cantFail(C.takeError());
2689
2690 if (!Res)
2691 return Res;
2692 }
2693 KdStream << ".end_amdhsa_kernel\n";
2694 outs() << KdStream.str();
2695 return true;
2696}
2697
2699 uint64_t &Size,
2700 ArrayRef<uint8_t> Bytes,
2701 uint64_t Address) const {
2702 // Right now only kernel descriptor needs to be handled.
2703 // We ignore all other symbols for target specific handling.
2704 // TODO:
2705 // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2706 // Object V2 and V3 when symbols are marked protected.
2707
2708 // amd_kernel_code_t for Code Object V2.
2709 if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2710 Size = 256;
2711 return createStringError(std::errc::invalid_argument,
2712 "code object v2 is not supported");
2713 }
2714
2715 // Code Object V3 kernel descriptors.
2716 StringRef Name = Symbol.Name;
2717 if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(StringRef(".kd"))) {
2718 Size = 64; // Size = 64 regardless of success or failure.
2719 return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
2720 }
2721
2722 return false;
2723}
2724
2725const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(StringRef Id,
2726 int64_t Val) {
2727 MCContext &Ctx = getContext();
2728 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2729 // Note: only set value to Val on a new symbol in case an dissassembler
2730 // has already been initialized in this context.
2731 if (!Sym->isVariable()) {
2733 } else {
2734 int64_t Res = ~Val;
2735 bool Valid = Sym->getVariableValue()->evaluateAsAbsolute(Res);
2736 if (!Valid || Res != Val)
2737 Ctx.reportWarning(SMLoc(), "unsupported redefinition of " + Id);
2738 }
2739 return MCSymbolRefExpr::create(Sym, Ctx);
2740}
2741
2743 const uint64_t TSFlags = MCII->get(MI.getOpcode()).TSFlags;
2744
2745 // Check for MUBUF and MTBUF instructions
2746 if (TSFlags & (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))
2747 return true;
2748
2749 // Check for SMEM buffer instructions (S_BUFFER_* instructions)
2750 if ((TSFlags & SIInstrFlags::SMRD) && AMDGPU::getSMEMIsBuffer(MI.getOpcode()))
2751 return true;
2752
2753 return false;
2754}
2755
2756//===----------------------------------------------------------------------===//
2757// AMDGPUSymbolizer
2758//===----------------------------------------------------------------------===//
2759
2760// Try to find symbol name for specified label
2762 MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2763 uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2764 uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2765
2766 if (!IsBranch) {
2767 return false;
2768 }
2769
2770 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2771 if (!Symbols)
2772 return false;
2773
2774 auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) {
2775 return Val.Addr == static_cast<uint64_t>(Value) &&
2776 Val.Type == ELF::STT_NOTYPE;
2777 });
2778 if (Result != Symbols->end()) {
2779 auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
2780 const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
2782 return true;
2783 }
2784 // Add to list of referenced addresses, so caller can synthesize a label.
2785 ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
2786 return false;
2787}
2788
2790 int64_t Value,
2791 uint64_t Address) {
2792 llvm_unreachable("unimplemented");
2793}
2794
2795//===----------------------------------------------------------------------===//
2796// Initialization
2797//===----------------------------------------------------------------------===//
2798
2800 LLVMOpInfoCallback /*GetOpInfo*/,
2801 LLVMSymbolLookupCallback /*SymbolLookUp*/,
2802 void *DisInfo,
2803 MCContext *Ctx,
2804 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
2805 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
2806}
2807
2809 const MCSubtargetInfo &STI,
2810 MCContext &Ctx) {
2811 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
2812}
2813
2814extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
unsigned const MachineRegisterInfo * MRI
MCDisassembler::DecodeStatus DecodeStatus
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
#define CHECK_RESERVED_BITS_DESC(MASK, DESC)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, AMDGPU::OpName Name)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_KImmFP64(MCInst &Inst, uint64_t Imm, uint64_t Addr, const MCDisassembler *Decoder)
static SmallString< 32 > getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes)
Print a string describing the reserved bit range specified by Mask with offset BaseBytes for use in e...
#define DECODE_OPERAND_SREG_8(RegClass, OpWidth)
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static std::bitset< 128 > eat16Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_SREG_7(RegClass, OpWidth)
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, unsigned OpWidth, unsigned Imm, unsigned EncImm, const MCDisassembler *Decoder)
static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValBF16(unsigned Imm)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_8(RegClass)
static unsigned CheckVGPROverflow(unsigned Reg, const MCRegisterClass &RC, const MCRegisterInfo &MRI)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define CHECK_RESERVED_BITS(MASK)
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define SGPR_MAX
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_MSG(MASK, MSG)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValF16(unsigned Imm)
#define GET_FIELD(MASK)
static std::bitset< 96 > eat12Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static Error createReservedKDBytesError(unsigned BaseInBytes, unsigned WidthInBytes)
Create an error object to return from onSymbolStart for reserved kernel descriptor bytes being set.
static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)
static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, const char *Msg="")
Create an error object to return from onSymbolStart for reserved kernel descriptor bits being set.
static void adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo &MRI, MCOperand &MO, uint8_t NumRegs)
Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the appropriate subregister fo...
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
#define T
MachineInstr unsigned OpIdx
Interface definition for SIRegisterInfo.
MCOperand decodeLiteral64Constant() const
void convertVOPC64DPPInst(MCInst &MI) const
bool isBufferInstruction(const MCInst &MI) const
Check if the instruction is a buffer operation (MUBUF, MTBUF, or S_BUFFER)
void convertEXPInst(MCInst &MI) const
MCOperand createRegOperand(unsigned int RegId) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
Expected< bool > decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
Expected< bool > decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
void convertVOPCDPPInst(MCInst &MI) const
MCOperand decodeSpecialReg96Plus(unsigned Val) const
MCOperand decodeSDWASrc32(unsigned Val) const
void setABIVersion(unsigned Version) override
ELF-specific, set the ABI version from the object header.
Expected< bool > decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
unsigned getAgprClassId(unsigned Width) const
MCOperand decodeDpp8FI(unsigned Val) const
MCOperand decodeSDWASrc(unsigned Width, unsigned Val) const
void convertFMAanyK(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
MCOperand decodeBoolReg(unsigned Val) const
void convertDPP8Inst(MCInst &MI) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
MCOperand decodeVersionImm(unsigned Imm) const
Expected< bool > decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
MCOperand decodeSplitBarrier(unsigned Val) const
void convertVOP3DPPInst(MCInst &MI) const
void convertTrue16OpSel(MCInst &MI) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
Expected< bool > decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
MCOperand decodeNonVGPRSrcOp(unsigned Width, unsigned Val) const
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand decodeLiteralConstant(bool ExtendFP64) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
void convertVINTERPInst(MCInst &MI) const
void convertSDWAInst(MCInst &MI) const
MCOperand decodeSrcOp(unsigned Width, unsigned Val) const
unsigned getSgprClassId(unsigned Width) const
static MCOperand decodeIntImmed(unsigned Imm)
void convertWMMAInst(MCInst &MI) const
unsigned getVgprClassId(unsigned Width) const
void convertMAIInst(MCInst &MI) const
f8f6f4 instructions have different pseudos depending on the used formats.
unsigned getTtmpClassId(unsigned Width) const
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
MCOperand decodeMandatoryLiteral64Constant(uint64_t Imm) const
void convertMIMGInst(MCInst &MI) const
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
void convertVOP3PDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
MCOperand decodeSDWASrc16(unsigned Val) const
Expected< bool > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address) const override
Used to perform separate target specific disassembly for a particular symbol.
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
const T * data() const
Definition ArrayRef.h:144
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:191
A class representing a position in a DataExtractor, as well as any error encountered during extractio...
LLVM_ABI uint32_t getU32(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint32_t value from *offset_ptr.
LLVM_ABI uint16_t getU16(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint16_t value from *offset_ptr.
LLVM_ABI void skip(Cursor &C, uint64_t Length) const
Advance the Cursor position by the given number of bytes.
LLVM_ABI StringRef getBytes(uint64_t *OffsetPtr, uint64_t Length, Error *Err=nullptr) const
Extract a fixed number of bytes from the specified offset.
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
Tagged union holding either a T or a Error.
Definition Error.h:485
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:408
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
const MCRegisterInfo * getRegisterInfo() const
Definition MCContext.h:414
Superclass for all disassemblers.
MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getOpcode() const
Definition MCInst.h:202
void addOperand(const MCOperand Op)
Definition MCInst.h:215
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Interface to description of machine instruction set.
Definition MCInstrInfo.h:27
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
int64_t getImm() const
Definition MCInst.h:84
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
void setReg(MCRegister Reg)
Set the register number.
Definition MCInst.h:79
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
bool isValid() const
Definition MCInst.h:64
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getSizeInBits() const
Return the size of the physical register in bits if we are able to determine it.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
Generic base class for all target subtargets.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
Symbolize and annotate disassembled instructions.
Represents a location in source code.
Definition SMLoc.h:23
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM Value Representation.
Definition Value.h:75
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
A raw_ostream that writes to an SmallVector or SmallString.
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
ArrayRef< GFXVersion > getGFXVersions()
EncodingField< Bit, Bit, D > EncodingBit
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isGFX10(const MCSubtargetInfo &STI)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool getSMEMIsBuffer(unsigned Opc)
bool isVOPC64DPP(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
const MFMA_F8F6F4_Info * getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA, unsigned FmtB, unsigned F8F8Opcode)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:202
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:209
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:222
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:219
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:224
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:206
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:208
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:207
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:218
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:216
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:205
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:225
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:238
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:220
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:203
bool hasGDS(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool isGFX1250(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool hasVOPD(const MCSubtargetInfo &STI)
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ STT_NOTYPE
Definition ELF.h:1408
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1422
@ STT_OBJECT
Definition ELF.h:1409
value_type read(const void *memory, endianness endian)
Read a value of a particular endianness from memory.
Definition Endian.h:58
uint16_t read16(const void *P, endianness E)
Definition Endian.h:405
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:307
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2454
LLVM_ABI raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
Op::Description Desc
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:157
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition Error.h:769
Target & getTheGCNTarget()
The target for GCN GPUs.
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Add
Sum of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition VE.h:376
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1740
std::vector< SymbolInfoTy > SectionSymbolsTy
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:583
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.