LLVM 22.0.0git
AMDGPUDisassembler.cpp
Go to the documentation of this file.
1//===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10//
11/// \file
12///
13/// This file contains definition for AMDGPU ISA disassembler
14//
15//===----------------------------------------------------------------------===//
16
17// ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18
21#include "SIDefines.h"
22#include "SIRegisterInfo.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCDecoder.h"
32#include "llvm/MC/MCExpr.h"
33#include "llvm/MC/MCInstrDesc.h"
39
40using namespace llvm;
41using namespace llvm::MCD;
42
43#define DEBUG_TYPE "amdgpu-disassembler"
44
45#define SGPR_MAX \
46 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
47 : AMDGPU::EncValues::SGPR_MAX_SI)
48
50
51static int64_t getInlineImmValF16(unsigned Imm);
52static int64_t getInlineImmValBF16(unsigned Imm);
53static int64_t getInlineImmVal32(unsigned Imm);
54static int64_t getInlineImmVal64(unsigned Imm);
55
57 MCContext &Ctx, MCInstrInfo const *MCII)
58 : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
59 MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
60 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
61 // ToDo: AMDGPUDisassembler supports only VI ISA.
62 if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
63 reportFatalUsageError("disassembly not yet supported for subtarget");
64
65 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
66 createConstantSymbolExpr(Symbol, Code);
67
68 UCVersionW64Expr = createConstantSymbolExpr("UC_VERSION_W64_BIT", 0x2000);
69 UCVersionW32Expr = createConstantSymbolExpr("UC_VERSION_W32_BIT", 0x4000);
70 UCVersionMDPExpr = createConstantSymbolExpr("UC_VERSION_MDP_BIT", 0x8000);
71}
72
76
78addOperand(MCInst &Inst, const MCOperand& Opnd) {
79 Inst.addOperand(Opnd);
80 return Opnd.isValid() ?
83}
84
86 AMDGPU::OpName Name) {
87 int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), Name);
88 if (OpIdx != -1) {
89 auto *I = MI.begin();
90 std::advance(I, OpIdx);
91 MI.insert(I, Op);
92 }
93 return OpIdx;
94}
95
96static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
97 uint64_t Addr,
98 const MCDisassembler *Decoder) {
99 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
100
101 // Our branches take a simm16.
102 int64_t Offset = SignExtend64<16>(Imm) * 4 + 4 + Addr;
103
104 if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
106 return addOperand(Inst, MCOperand::createImm(Imm));
107}
108
109static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
110 const MCDisassembler *Decoder) {
111 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
112 int64_t Offset;
113 if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
115 } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
116 Offset = Imm & 0xFFFFF;
117 } else { // GFX9+ supports 21-bit signed offsets.
119 }
121}
122
123static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
124 const MCDisassembler *Decoder) {
125 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
126 return addOperand(Inst, DAsm->decodeBoolReg(Val));
127}
128
129static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
130 uint64_t Addr,
131 const MCDisassembler *Decoder) {
132 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
133 return addOperand(Inst, DAsm->decodeSplitBarrier(Val));
134}
135
136static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr,
137 const MCDisassembler *Decoder) {
138 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
139 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
140}
141
142#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
143 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
144 uint64_t /*Addr*/, \
145 const MCDisassembler *Decoder) { \
146 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
147 return addOperand(Inst, DAsm->DecoderName(Imm)); \
148 }
149
150// Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
151// number of register. Used by VGPR only and AGPR only operands.
152#define DECODE_OPERAND_REG_8(RegClass) \
153 static DecodeStatus Decode##RegClass##RegisterClass( \
154 MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
155 const MCDisassembler *Decoder) { \
156 assert(Imm < (1 << 8) && "8-bit encoding"); \
157 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
158 return addOperand( \
159 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
160 }
161
162#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm) \
163 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
164 const MCDisassembler *Decoder) { \
165 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
166 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
167 return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm)); \
168 }
169
170static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize,
171 unsigned OpWidth, unsigned Imm, unsigned EncImm,
172 const MCDisassembler *Decoder) {
173 assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");
174 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
175 return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm));
176}
177
178// Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
179// get register class. Used by SGPR only operands.
180#define DECODE_OPERAND_SREG_7(RegClass, OpWidth) \
181 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm)
182
183#define DECODE_OPERAND_SREG_8(RegClass, OpWidth) \
184 DECODE_SrcOp(Decode##RegClass##RegisterClass, 8, OpWidth, Imm)
185
186// Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
187// Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
188// Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
189// Used by AV_ register classes (AGPR or VGPR only register operands).
190template <unsigned OpWidth>
191static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
192 const MCDisassembler *Decoder) {
193 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm | AMDGPU::EncValues::IS_VGPR,
194 Decoder);
195}
196
197// Decoder for Src(9-bit encoding) registers only.
198template <unsigned OpWidth>
199static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm,
200 uint64_t /* Addr */,
201 const MCDisassembler *Decoder) {
202 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
203}
204
205// Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
206// Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
207// only.
208template <unsigned OpWidth>
209static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
210 const MCDisassembler *Decoder) {
211 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
212}
213
214// Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
215// Imm{9} is acc, registers only.
216template <unsigned OpWidth>
217static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm,
218 uint64_t /* Addr */,
219 const MCDisassembler *Decoder) {
220 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, Decoder);
221}
222
223// Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
224// register from RegClass or immediate. Registers that don't belong to RegClass
225// will be decoded and InstPrinter will report warning. Immediate will be
226// decoded into constant matching the OperandType (important for floating point
227// types).
228template <unsigned OpWidth>
229static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm,
230 uint64_t /* Addr */,
231 const MCDisassembler *Decoder) {
232 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
233}
234
235// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
236// and decode using 'enum10' from decodeSrcOp.
237template <unsigned OpWidth>
238static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm,
239 uint64_t /* Addr */,
240 const MCDisassembler *Decoder) {
241 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
242}
243
244// Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
245// when RegisterClass is used as an operand. Most often used for destination
246// operands.
247
249DECODE_OPERAND_REG_8(VGPR_32_Lo128)
252DECODE_OPERAND_REG_8(VReg_128)
253DECODE_OPERAND_REG_8(VReg_192)
254DECODE_OPERAND_REG_8(VReg_256)
255DECODE_OPERAND_REG_8(VReg_288)
256DECODE_OPERAND_REG_8(VReg_320)
257DECODE_OPERAND_REG_8(VReg_352)
258DECODE_OPERAND_REG_8(VReg_384)
259DECODE_OPERAND_REG_8(VReg_512)
260DECODE_OPERAND_REG_8(VReg_1024)
261
262DECODE_OPERAND_SREG_7(SReg_32, 32)
263DECODE_OPERAND_SREG_7(SReg_32_XM0, 32)
264DECODE_OPERAND_SREG_7(SReg_32_XEXEC, 32)
265DECODE_OPERAND_SREG_7(SReg_32_XM0_XEXEC, 32)
266DECODE_OPERAND_SREG_7(SReg_32_XEXEC_HI, 32)
267DECODE_OPERAND_SREG_7(SReg_64_XEXEC, 64)
268DECODE_OPERAND_SREG_7(SReg_64_XEXEC_XNULL, 64)
269DECODE_OPERAND_SREG_7(SReg_96, 96)
270DECODE_OPERAND_SREG_7(SReg_128, 128)
271DECODE_OPERAND_SREG_7(SReg_128_XNULL, 128)
272DECODE_OPERAND_SREG_7(SReg_256, 256)
273DECODE_OPERAND_SREG_7(SReg_256_XNULL, 256)
274DECODE_OPERAND_SREG_7(SReg_512, 512)
275
276DECODE_OPERAND_SREG_8(SReg_64, 64)
277
280DECODE_OPERAND_REG_8(AReg_128)
281DECODE_OPERAND_REG_8(AReg_256)
282DECODE_OPERAND_REG_8(AReg_512)
283DECODE_OPERAND_REG_8(AReg_1024)
284
286 uint64_t /*Addr*/,
287 const MCDisassembler *Decoder) {
288 assert(isUInt<10>(Imm) && "10-bit encoding expected");
289 assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
290
291 bool IsHi = Imm & (1 << 9);
292 unsigned RegIdx = Imm & 0xff;
293 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
294 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
295}
296
297static DecodeStatus
299 const MCDisassembler *Decoder) {
300 assert(isUInt<8>(Imm) && "8-bit encoding expected");
301
302 bool IsHi = Imm & (1 << 7);
303 unsigned RegIdx = Imm & 0x7f;
304 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
305 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
306}
307
308template <unsigned OpWidth>
310 uint64_t /*Addr*/,
311 const MCDisassembler *Decoder) {
312 assert(isUInt<9>(Imm) && "9-bit encoding expected");
313
314 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
315 if (Imm & AMDGPU::EncValues::IS_VGPR) {
316 bool IsHi = Imm & (1 << 7);
317 unsigned RegIdx = Imm & 0x7f;
318 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
319 }
320 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(OpWidth, Imm & 0xFF));
321}
322
323template <unsigned OpWidth>
324static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
325 uint64_t /*Addr*/,
326 const MCDisassembler *Decoder) {
327 assert(isUInt<10>(Imm) && "10-bit encoding expected");
328
329 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
330 if (Imm & AMDGPU::EncValues::IS_VGPR) {
331 bool IsHi = Imm & (1 << 9);
332 unsigned RegIdx = Imm & 0xff;
333 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
334 }
335 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(OpWidth, Imm & 0xFF));
336}
337
338static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm,
339 uint64_t /*Addr*/,
340 const MCDisassembler *Decoder) {
341 assert(isUInt<10>(Imm) && "10-bit encoding expected");
342 assert(Imm & AMDGPU::EncValues::IS_VGPR && "VGPR expected");
343
344 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
345
346 bool IsHi = Imm & (1 << 9);
347 unsigned RegIdx = Imm & 0xff;
348 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
349}
350
351static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
352 uint64_t Addr,
353 const MCDisassembler *Decoder) {
354 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
355 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
356}
357
359 uint64_t Addr,
360 const MCDisassembler *Decoder) {
361 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
362 return addOperand(Inst, DAsm->decodeMandatoryLiteral64Constant(Imm));
363}
364
365static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
366 uint64_t Addr, const void *Decoder) {
367 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
368 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
369}
370
371static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
372 const MCRegisterInfo *MRI) {
373 if (OpIdx < 0)
374 return false;
375
376 const MCOperand &Op = Inst.getOperand(OpIdx);
377 if (!Op.isReg())
378 return false;
379
380 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
381 auto Reg = Sub ? Sub : Op.getReg();
382 return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
383}
384
385static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw,
386 const MCDisassembler *Decoder) {
387 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
388 if (!DAsm->isGFX90A()) {
389 Imm &= 511;
390 } else {
391 // If atomic has both vdata and vdst their register classes are tied.
392 // The bit is decoded along with the vdst, first operand. We need to
393 // change register class to AGPR if vdst was AGPR.
394 // If a DS instruction has both data0 and data1 their register classes
395 // are also tied.
396 unsigned Opc = Inst.getOpcode();
397 uint64_t TSFlags = DAsm->getMCII()->get(Opc).TSFlags;
398 AMDGPU::OpName DataName = (TSFlags & SIInstrFlags::DS)
399 ? AMDGPU::OpName::data0
400 : AMDGPU::OpName::vdata;
401 const MCRegisterInfo *MRI = DAsm->getContext().getRegisterInfo();
402 int DataIdx = AMDGPU::getNamedOperandIdx(Opc, DataName);
403 if ((int)Inst.getNumOperands() == DataIdx) {
404 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
405 if (IsAGPROperand(Inst, DstIdx, MRI))
406 Imm |= 512;
407 }
408
409 if (TSFlags & SIInstrFlags::DS) {
410 int Data2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1);
411 if ((int)Inst.getNumOperands() == Data2Idx &&
412 IsAGPROperand(Inst, DataIdx, MRI))
413 Imm |= 512;
414 }
415 }
416 return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
417}
418
419template <unsigned Opw>
420static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
421 uint64_t /* Addr */,
422 const MCDisassembler *Decoder) {
423 return decodeAVLdSt(Inst, Imm, Opw, Decoder);
424}
425
426static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
427 uint64_t Addr,
428 const MCDisassembler *Decoder) {
429 assert(Imm < (1 << 9) && "9-bit encoding");
430 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
431 return addOperand(Inst, DAsm->decodeSrcOp(64, Imm));
432}
433
434#define DECODE_SDWA(DecName) \
435DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
436
437DECODE_SDWA(Src32)
438DECODE_SDWA(Src16)
439DECODE_SDWA(VopcDst)
440
441static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm,
442 uint64_t /* Addr */,
443 const MCDisassembler *Decoder) {
444 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
445 return addOperand(Inst, DAsm->decodeVersionImm(Imm));
446}
447
448#include "AMDGPUGenDisassemblerTables.inc"
449
450namespace {
451// Define bitwidths for various types used to instantiate the decoder.
452template <> constexpr uint32_t InsnBitWidth<uint32_t> = 32;
453template <> constexpr uint32_t InsnBitWidth<uint64_t> = 64;
454template <> constexpr uint32_t InsnBitWidth<std::bitset<96>> = 96;
455template <> constexpr uint32_t InsnBitWidth<std::bitset<128>> = 128;
456} // namespace
457
458//===----------------------------------------------------------------------===//
459//
460//===----------------------------------------------------------------------===//
461
462template <typename InsnType>
464 InsnType Inst, uint64_t Address,
465 raw_ostream &Comments) const {
466 assert(MI.getOpcode() == 0);
467 assert(MI.getNumOperands() == 0);
468 MCInst TmpInst;
469 HasLiteral = false;
470 const auto SavedBytes = Bytes;
471
472 SmallString<64> LocalComments;
473 raw_svector_ostream LocalCommentStream(LocalComments);
474 CommentStream = &LocalCommentStream;
475
476 DecodeStatus Res =
477 decodeInstruction(Table, TmpInst, Inst, Address, this, STI);
478
479 CommentStream = nullptr;
480
481 if (Res != MCDisassembler::Fail) {
482 MI = TmpInst;
483 Comments << LocalComments;
485 }
486 Bytes = SavedBytes;
488}
489
490template <typename InsnType>
493 MCInst &MI, InsnType Inst, uint64_t Address,
494 raw_ostream &Comments) const {
495 for (const uint8_t *T : {Table1, Table2}) {
496 if (DecodeStatus Res = tryDecodeInst(T, MI, Inst, Address, Comments))
497 return Res;
498 }
500}
501
502template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
503 assert(Bytes.size() >= sizeof(T));
504 const auto Res =
506 Bytes = Bytes.slice(sizeof(T));
507 return Res;
508}
509
510static inline std::bitset<96> eat12Bytes(ArrayRef<uint8_t> &Bytes) {
511 using namespace llvm::support::endian;
512 assert(Bytes.size() >= 12);
513 std::bitset<96> Lo(read<uint64_t, endianness::little>(Bytes.data()));
514 Bytes = Bytes.slice(8);
515 std::bitset<96> Hi(read<uint32_t, endianness::little>(Bytes.data()));
516 Bytes = Bytes.slice(4);
517 return (Hi << 64) | Lo;
518}
519
520static inline std::bitset<128> eat16Bytes(ArrayRef<uint8_t> &Bytes) {
521 using namespace llvm::support::endian;
522 assert(Bytes.size() >= 16);
523 std::bitset<128> Lo(read<uint64_t, endianness::little>(Bytes.data()));
524 Bytes = Bytes.slice(8);
525 std::bitset<128> Hi(read<uint64_t, endianness::little>(Bytes.data()));
526 Bytes = Bytes.slice(8);
527 return (Hi << 64) | Lo;
528}
529
530void AMDGPUDisassembler::decodeImmOperands(MCInst &MI,
531 const MCInstrInfo &MCII) const {
532 const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
533 for (auto [OpNo, OpDesc] : enumerate(Desc.operands())) {
534 if (OpNo >= MI.getNumOperands())
535 continue;
536
537 // TODO: Fix V_DUAL_FMAMK_F32_X_FMAAK_F32_gfx12 vsrc operands,
538 // defined to take VGPR_32, but in reality allowing inline constants.
539 bool IsSrc = AMDGPU::OPERAND_SRC_FIRST <= OpDesc.OperandType &&
540 OpDesc.OperandType <= AMDGPU::OPERAND_SRC_LAST;
541 if (!IsSrc && OpDesc.OperandType != MCOI::OPERAND_REGISTER)
542 continue;
543
544 MCOperand &Op = MI.getOperand(OpNo);
545 if (!Op.isImm())
546 continue;
547 int64_t Imm = Op.getImm();
550 Op = decodeIntImmed(Imm);
551 continue;
552 }
553
555 Op = decodeLiteralConstant(OpDesc.OperandType ==
557 continue;
558 }
559
562 switch (OpDesc.OperandType) {
568 break;
575 Imm = getInlineImmValF16(Imm);
576 break;
582 Imm = getInlineImmVal64(Imm);
583 break;
584 default:
585 Imm = getInlineImmVal32(Imm);
586 }
587 Op.setImm(Imm);
588 }
589 }
590}
591
593 ArrayRef<uint8_t> Bytes_,
595 raw_ostream &CS) const {
596 unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
597 Bytes = Bytes_.slice(0, MaxInstBytesNum);
598
599 // In case the opcode is not recognized we'll assume a Size of 4 bytes (unless
600 // there are fewer bytes left). This will be overridden on success.
601 Size = std::min((size_t)4, Bytes_.size());
602
603 do {
604 // ToDo: better to switch encoding length using some bit predicate
605 // but it is unknown yet, so try all we can
606
607 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
608 // encodings
609 if (isGFX1250() && Bytes.size() >= 16) {
610 std::bitset<128> DecW = eat16Bytes(Bytes);
611 if (tryDecodeInst(DecoderTableGFX1250128, MI, DecW, Address, CS))
612 break;
613 Bytes = Bytes_.slice(0, MaxInstBytesNum);
614 }
615
616 if (isGFX11Plus() && Bytes.size() >= 12) {
617 std::bitset<96> DecW = eat12Bytes(Bytes);
618
619 if (isGFX11() &&
620 tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
621 DecW, Address, CS))
622 break;
623
624 if (isGFX1250() &&
625 tryDecodeInst(DecoderTableGFX125096, DecoderTableGFX1250_FAKE1696, MI,
626 DecW, Address, CS))
627 break;
628
629 if (isGFX12() &&
630 tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
631 DecW, Address, CS))
632 break;
633
634 if (isGFX12() &&
635 tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
636 break;
637
638 if (STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
639 // Return 8 bytes for a potential literal.
640 Bytes = Bytes_.slice(4, MaxInstBytesNum - 4);
641
642 if (isGFX1250() &&
643 tryDecodeInst(DecoderTableGFX125096, MI, DecW, Address, CS))
644 break;
645 }
646
647 // Reinitialize Bytes
648 Bytes = Bytes_.slice(0, MaxInstBytesNum);
649
650 } else if (Bytes.size() >= 16 &&
651 STI.hasFeature(AMDGPU::FeatureGFX950Insts)) {
652 std::bitset<128> DecW = eat16Bytes(Bytes);
653 if (tryDecodeInst(DecoderTableGFX940128, MI, DecW, Address, CS))
654 break;
655
656 // Reinitialize Bytes
657 Bytes = Bytes_.slice(0, MaxInstBytesNum);
658 }
659
660 if (Bytes.size() >= 8) {
661 const uint64_t QW = eatBytes<uint64_t>(Bytes);
662
663 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
664 tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS))
665 break;
666
667 if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&
668 tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS))
669 break;
670
671 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
672 tryDecodeInst(DecoderTableGFX95064, MI, QW, Address, CS))
673 break;
674
675 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
676 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
677 // table first so we print the correct name.
678 if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&
679 tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS))
680 break;
681
682 if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
683 tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS))
684 break;
685
686 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
687 tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS))
688 break;
689
690 if ((isVI() || isGFX9()) &&
691 tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
692 break;
693
694 if (isGFX9() && tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS))
695 break;
696
697 if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
698 break;
699
700 if (isGFX1250() &&
701 tryDecodeInst(DecoderTableGFX125064, DecoderTableGFX1250_FAKE1664, MI,
702 QW, Address, CS))
703 break;
704
705 if (isGFX12() &&
706 tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
707 Address, CS))
708 break;
709
710 if (isGFX11() &&
711 tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
712 Address, CS))
713 break;
714
715 if (isGFX11() &&
716 tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
717 break;
718
719 if (isGFX12() &&
720 tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
721 break;
722
723 // Reinitialize Bytes
724 Bytes = Bytes_.slice(0, MaxInstBytesNum);
725 }
726
727 // Try decode 32-bit instruction
728 if (Bytes.size() >= 4) {
729 const uint32_t DW = eatBytes<uint32_t>(Bytes);
730
731 if ((isVI() || isGFX9()) &&
732 tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS))
733 break;
734
735 if (tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS))
736 break;
737
738 if (isGFX9() && tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS))
739 break;
740
741 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
742 tryDecodeInst(DecoderTableGFX95032, MI, DW, Address, CS))
743 break;
744
745 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
746 tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS))
747 break;
748
749 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
750 tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS))
751 break;
752
753 if (isGFX10() && tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
754 break;
755
756 if (isGFX11() &&
757 tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
758 Address, CS))
759 break;
760
761 if (isGFX1250() &&
762 tryDecodeInst(DecoderTableGFX125032, DecoderTableGFX1250_FAKE1632, MI,
763 DW, Address, CS))
764 break;
765
766 if (isGFX12() &&
767 tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
768 Address, CS))
769 break;
770 }
771
773 } while (false);
774
776
777 decodeImmOperands(MI, *MCII);
778
779 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DPP) {
780 if (isMacDPP(MI))
782
783 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
785 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
786 convertVOPCDPPInst(MI); // Special VOP3 case
787 else if (AMDGPU::isVOPC64DPP(MI.getOpcode()))
788 convertVOPC64DPPInst(MI); // Special VOP3 case
789 else if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8) !=
790 -1)
792 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3)
793 convertVOP3DPPInst(MI); // Regular VOP3 case
794 }
795
797
798 if (AMDGPU::isMAC(MI.getOpcode())) {
799 // Insert dummy unused src2_modifiers.
801 AMDGPU::OpName::src2_modifiers);
802 }
803
804 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
805 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
806 // Insert dummy unused src2_modifiers.
808 AMDGPU::OpName::src2_modifiers);
809 }
810
811 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
813 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
814 }
815
816 if (MCII->get(MI.getOpcode()).TSFlags &
818 int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
819 AMDGPU::OpName::cpol);
820 if (CPolPos != -1) {
821 unsigned CPol =
822 (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
824 if (MI.getNumOperands() <= (unsigned)CPolPos) {
826 AMDGPU::OpName::cpol);
827 } else if (CPol) {
828 MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
829 }
830 }
831 }
832
833 if ((MCII->get(MI.getOpcode()).TSFlags &
835 (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
836 // GFX90A lost TFE, its place is occupied by ACC.
837 int TFEOpIdx =
838 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
839 if (TFEOpIdx != -1) {
840 auto *TFEIter = MI.begin();
841 std::advance(TFEIter, TFEOpIdx);
842 MI.insert(TFEIter, MCOperand::createImm(0));
843 }
844 }
845
846 // Validate buffer instruction offsets for GFX12+ - must not be a negative.
848 int OffsetIdx =
849 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::offset);
850 if (OffsetIdx != -1) {
851 uint32_t Imm = MI.getOperand(OffsetIdx).getImm();
852 int64_t SignedOffset = SignExtend64<24>(Imm);
853 if (SignedOffset < 0)
855 }
856 }
857
858 if (MCII->get(MI.getOpcode()).TSFlags &
860 int SWZOpIdx =
861 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
862 if (SWZOpIdx != -1) {
863 auto *SWZIter = MI.begin();
864 std::advance(SWZIter, SWZOpIdx);
865 MI.insert(SWZIter, MCOperand::createImm(0));
866 }
867 }
868
869 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG) {
870 int VAddr0Idx =
871 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
872 int RsrcIdx =
873 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
874 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
875 if (VAddr0Idx >= 0 && NSAArgs > 0) {
876 unsigned NSAWords = (NSAArgs + 3) / 4;
877 if (Bytes.size() < 4 * NSAWords)
879 for (unsigned i = 0; i < NSAArgs; ++i) {
880 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
881 auto VAddrRCID =
882 MCII->get(MI.getOpcode()).operands()[VAddrIdx].RegClass;
883 MI.insert(MI.begin() + VAddrIdx, createRegOperand(VAddrRCID, Bytes[i]));
884 }
885 Bytes = Bytes.slice(4 * NSAWords);
886 }
887
889 }
890
891 if (MCII->get(MI.getOpcode()).TSFlags &
894
895 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP)
897
898 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP)
900
901 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SDWA)
903
904 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsMAI)
906
907 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsWMMA)
909
910 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
911 AMDGPU::OpName::vdst_in);
912 if (VDstIn_Idx != -1) {
913 int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
915 if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
916 !MI.getOperand(VDstIn_Idx).isReg() ||
917 MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
918 if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
919 MI.erase(&MI.getOperand(VDstIn_Idx));
921 MCOperand::createReg(MI.getOperand(Tied).getReg()),
922 AMDGPU::OpName::vdst_in);
923 }
924 }
925
926 bool IsSOPK = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
927 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::imm) && !IsSOPK)
929
930 // Some VOPC instructions, e.g., v_cmpx_f_f64, use VOP3 encoding and
931 // have EXEC as implicit destination. Issue a warning if encoding for
932 // vdst is not EXEC.
933 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3) &&
934 MCII->get(MI.getOpcode()).hasImplicitDefOfPhysReg(AMDGPU::EXEC)) {
935 auto ExecEncoding = MRI.getEncodingValue(AMDGPU::EXEC_LO);
936 if (Bytes_[0] != ExecEncoding)
938 }
939
940 Size = MaxInstBytesNum - Bytes.size();
941 return Status;
942}
943
945 if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
946 // The MCInst still has these fields even though they are no longer encoded
947 // in the GFX11 instruction.
948 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
949 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr);
950 }
951}
952
955 if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
956 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
957 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
958 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
959 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
960 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
961 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
962 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
963 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
964 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
965 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
966 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
967 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
968 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
969 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
970 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12) {
971 // The MCInst has this field that is not directly encoded in the
972 // instruction.
973 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
974 }
975}
976
978 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
979 STI.hasFeature(AMDGPU::FeatureGFX10)) {
980 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
981 // VOPC - insert clamp
982 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
983 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
984 int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
985 if (SDst != -1) {
986 // VOPC - insert VCC register as sdst
988 AMDGPU::OpName::sdst);
989 } else {
990 // VOP1/2 - insert omod if present in instruction
991 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
992 }
993 }
994}
995
996/// Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the
997/// appropriate subregister for the used format width.
999 MCOperand &MO, uint8_t NumRegs) {
1000 switch (NumRegs) {
1001 case 4:
1002 return MO.setReg(MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3));
1003 case 6:
1004 return MO.setReg(
1005 MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5));
1006 case 8:
1007 if (MCRegister NewReg = MRI.getSubReg(
1008 MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7)) {
1009 MO.setReg(NewReg);
1010 }
1011 return;
1012 case 12: {
1013 // There is no 384-bit subreg index defined.
1014 MCRegister BaseReg = MRI.getSubReg(MO.getReg(), AMDGPU::sub0);
1015 MCRegister NewReg = MRI.getMatchingSuperReg(
1016 BaseReg, AMDGPU::sub0, &MRI.getRegClass(AMDGPU::VReg_384RegClassID));
1017 return MO.setReg(NewReg);
1018 }
1019 case 16:
1020 // No-op in cases where one operand is still f8/bf8.
1021 return;
1022 default:
1023 llvm_unreachable("Unexpected size for mfma/wmma f8f6f4 operand");
1024 }
1025}
1026
1027/// f8f6f4 instructions have different pseudos depending on the used formats. In
1028/// the disassembler table, we only have the variants with the largest register
1029/// classes which assume using an fp8/bf8 format for both operands. The actual
1030/// register class depends on the format in blgp and cbsz operands. Adjust the
1031/// register classes depending on the used format.
1033 int BlgpIdx =
1034 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::blgp);
1035 if (BlgpIdx == -1)
1036 return;
1037
1038 int CbszIdx =
1039 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::cbsz);
1040
1041 unsigned CBSZ = MI.getOperand(CbszIdx).getImm();
1042 unsigned BLGP = MI.getOperand(BlgpIdx).getImm();
1043
1044 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1045 AMDGPU::getMFMA_F8F6F4_WithFormatArgs(CBSZ, BLGP, MI.getOpcode());
1046 if (!AdjustedRegClassOpcode ||
1047 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1048 return;
1049
1050 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1051 int Src0Idx =
1052 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
1053 int Src1Idx =
1054 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
1055 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src0Idx),
1056 AdjustedRegClassOpcode->NumRegsSrcA);
1057 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src1Idx),
1058 AdjustedRegClassOpcode->NumRegsSrcB);
1059}
1060
1062 int FmtAIdx =
1063 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::matrix_a_fmt);
1064 if (FmtAIdx == -1)
1065 return;
1066
1067 int FmtBIdx =
1068 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::matrix_b_fmt);
1069
1070 unsigned FmtA = MI.getOperand(FmtAIdx).getImm();
1071 unsigned FmtB = MI.getOperand(FmtBIdx).getImm();
1072
1073 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1074 AMDGPU::getWMMA_F8F6F4_WithFormatArgs(FmtA, FmtB, MI.getOpcode());
1075 if (!AdjustedRegClassOpcode ||
1076 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1077 return;
1078
1079 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1080 int Src0Idx =
1081 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
1082 int Src1Idx =
1083 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
1084 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src0Idx),
1085 AdjustedRegClassOpcode->NumRegsSrcA);
1086 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src1Idx),
1087 AdjustedRegClassOpcode->NumRegsSrcB);
1088}
1089
1091 unsigned OpSel = 0;
1092 unsigned OpSelHi = 0;
1093 unsigned NegLo = 0;
1094 unsigned NegHi = 0;
1095};
1096
1097// Reconstruct values of VOP3/VOP3P operands such as op_sel.
1098// Note that these values do not affect disassembler output,
1099// so this is only necessary for consistency with src_modifiers.
1101 bool IsVOP3P = false) {
1102 VOPModifiers Modifiers;
1103 unsigned Opc = MI.getOpcode();
1104 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
1105 AMDGPU::OpName::src1_modifiers,
1106 AMDGPU::OpName::src2_modifiers};
1107 for (int J = 0; J < 3; ++J) {
1108 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
1109 if (OpIdx == -1)
1110 continue;
1111
1112 unsigned Val = MI.getOperand(OpIdx).getImm();
1113
1114 Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
1115 if (IsVOP3P) {
1116 Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
1117 Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
1118 Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
1119 } else if (J == 0) {
1120 Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
1121 }
1122 }
1123
1124 return Modifiers;
1125}
1126
1127// Instructions decode the op_sel/suffix bits into the src_modifier
1128// operands. Copy those bits into the src operands for true16 VGPRs.
1130 const unsigned Opc = MI.getOpcode();
1131 const MCRegisterClass &ConversionRC =
1132 MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
1133 constexpr std::array<std::tuple<AMDGPU::OpName, AMDGPU::OpName, unsigned>, 4>
1134 OpAndOpMods = {{{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
1136 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
1138 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
1140 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
1142 for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
1143 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
1144 int OpModsIdx = AMDGPU::getNamedOperandIdx(Opc, OpModsName);
1145 if (OpIdx == -1 || OpModsIdx == -1)
1146 continue;
1147 MCOperand &Op = MI.getOperand(OpIdx);
1148 if (!Op.isReg())
1149 continue;
1150 if (!ConversionRC.contains(Op.getReg()))
1151 continue;
1152 unsigned OpEnc = MRI.getEncodingValue(Op.getReg());
1153 const MCOperand &OpMods = MI.getOperand(OpModsIdx);
1154 unsigned ModVal = OpMods.getImm();
1155 if (ModVal & OpSelMask) { // isHi
1156 unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
1157 Op.setReg(ConversionRC.getRegister(RegIdx * 2 + 1));
1158 }
1159 }
1160}
1161
1162// MAC opcodes have special old and src2 operands.
1163// src2 is tied to dst, while old is not tied (but assumed to be).
1165 constexpr int DST_IDX = 0;
1166 auto Opcode = MI.getOpcode();
1167 const auto &Desc = MCII->get(Opcode);
1168 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
1169
1170 if (OldIdx != -1 && Desc.getOperandConstraint(
1171 OldIdx, MCOI::OperandConstraint::TIED_TO) == -1) {
1172 assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
1173 assert(Desc.getOperandConstraint(
1174 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
1176 (void)DST_IDX;
1177 return true;
1178 }
1179
1180 return false;
1181}
1182
1183// Create dummy old operand and insert dummy unused src2_modifiers
1185 assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
1186 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1188 AMDGPU::OpName::src2_modifiers);
1189}
1190
1192 unsigned Opc = MI.getOpcode();
1193
1194 int VDstInIdx =
1195 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
1196 if (VDstInIdx != -1)
1197 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
1198
1199 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1200 if (MI.getNumOperands() < DescNumOps &&
1201 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1203 auto Mods = collectVOPModifiers(MI);
1205 AMDGPU::OpName::op_sel);
1206 } else {
1207 // Insert dummy unused src modifiers.
1208 if (MI.getNumOperands() < DescNumOps &&
1209 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1211 AMDGPU::OpName::src0_modifiers);
1212
1213 if (MI.getNumOperands() < DescNumOps &&
1214 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1216 AMDGPU::OpName::src1_modifiers);
1217 }
1218}
1219
1222
1223 int VDstInIdx =
1224 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
1225 if (VDstInIdx != -1)
1226 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
1227
1228 unsigned Opc = MI.getOpcode();
1229 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1230 if (MI.getNumOperands() < DescNumOps &&
1231 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1232 auto Mods = collectVOPModifiers(MI);
1234 AMDGPU::OpName::op_sel);
1235 }
1236}
1237
1238// Given a wide tuple \p Reg check if it will overflow 256 registers.
1239// \returns \p Reg on success or NoRegister otherwise.
1240static unsigned CheckVGPROverflow(unsigned Reg, const MCRegisterClass &RC,
1241 const MCRegisterInfo &MRI) {
1242 unsigned NumRegs = RC.getSizeInBits() / 32;
1243 MCRegister Sub0 = MRI.getSubReg(Reg, AMDGPU::sub0);
1244 if (!Sub0)
1245 return Reg;
1246
1247 MCRegister BaseReg;
1248 if (MRI.getRegClass(AMDGPU::VGPR_32RegClassID).contains(Sub0))
1249 BaseReg = AMDGPU::VGPR0;
1250 else if (MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Sub0))
1251 BaseReg = AMDGPU::AGPR0;
1252
1253 assert(BaseReg && "Only vector registers expected");
1254
1255 return (Sub0 - BaseReg + NumRegs <= 256) ? Reg : AMDGPU::NoRegister;
1256}
1257
1258// Note that before gfx10, the MIMG encoding provided no information about
1259// VADDR size. Consequently, decoded instructions always show address as if it
1260// has 1 dword, which could be not really so.
1262 auto TSFlags = MCII->get(MI.getOpcode()).TSFlags;
1263
1264 int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1265 AMDGPU::OpName::vdst);
1266
1267 int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1268 AMDGPU::OpName::vdata);
1269 int VAddr0Idx =
1270 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
1271 AMDGPU::OpName RsrcOpName = (TSFlags & SIInstrFlags::MIMG)
1272 ? AMDGPU::OpName::srsrc
1273 : AMDGPU::OpName::rsrc;
1274 int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName);
1275 int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1276 AMDGPU::OpName::dmask);
1277
1278 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1279 AMDGPU::OpName::tfe);
1280 int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1281 AMDGPU::OpName::d16);
1282
1283 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
1284 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
1285 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
1286
1287 assert(VDataIdx != -1);
1288 if (BaseOpcode->BVH) {
1289 // Add A16 operand for intersect_ray instructions
1290 addOperand(MI, MCOperand::createImm(BaseOpcode->A16));
1291 return;
1292 }
1293
1294 bool IsAtomic = (VDstIdx != -1);
1295 bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
1296 bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
1297 bool IsNSA = false;
1298 bool IsPartialNSA = false;
1299 unsigned AddrSize = Info->VAddrDwords;
1300
1301 if (isGFX10Plus()) {
1302 unsigned DimIdx =
1303 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
1304 int A16Idx =
1305 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
1306 const AMDGPU::MIMGDimInfo *Dim =
1307 AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
1308 const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
1309
1310 AddrSize =
1311 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
1312
1313 // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
1314 // VIMAGE insts other than BVH never use vaddr4.
1315 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1316 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1317 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
1318 if (!IsNSA) {
1319 if (!IsVSample && AddrSize > 12)
1320 AddrSize = 16;
1321 } else {
1322 if (AddrSize > Info->VAddrDwords) {
1323 if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
1324 // The NSA encoding does not contain enough operands for the
1325 // combination of base opcode / dimension. Should this be an error?
1326 return;
1327 }
1328 IsPartialNSA = true;
1329 }
1330 }
1331 }
1332
1333 unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
1334 unsigned DstSize = IsGather4 ? 4 : std::max(llvm::popcount(DMask), 1);
1335
1336 bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
1337 if (D16 && AMDGPU::hasPackedD16(STI)) {
1338 DstSize = (DstSize + 1) / 2;
1339 }
1340
1341 if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())
1342 DstSize += 1;
1343
1344 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1345 return;
1346
1347 int NewOpcode =
1348 AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
1349 if (NewOpcode == -1)
1350 return;
1351
1352 // Widen the register to the correct number of enabled channels.
1353 MCRegister NewVdata;
1354 if (DstSize != Info->VDataDwords) {
1355 auto DataRCID = MCII->get(NewOpcode).operands()[VDataIdx].RegClass;
1356
1357 // Get first subregister of VData
1358 MCRegister Vdata0 = MI.getOperand(VDataIdx).getReg();
1359 MCRegister VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
1360 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1361
1362 const MCRegisterClass &NewRC = MRI.getRegClass(DataRCID);
1363 NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, &NewRC);
1364 NewVdata = CheckVGPROverflow(NewVdata, NewRC, MRI);
1365 if (!NewVdata) {
1366 // It's possible to encode this such that the low register + enabled
1367 // components exceeds the register count.
1368 return;
1369 }
1370 }
1371
1372 // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
1373 // If using partial NSA on GFX11+ widen last address register.
1374 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1375 MCRegister NewVAddrSA;
1376 if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1377 AddrSize != Info->VAddrDwords) {
1378 MCRegister VAddrSA = MI.getOperand(VAddrSAIdx).getReg();
1379 MCRegister VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
1380 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1381
1382 auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass;
1383 const MCRegisterClass &NewRC = MRI.getRegClass(AddrRCID);
1384 NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0, &NewRC);
1385 NewVAddrSA = CheckVGPROverflow(NewVAddrSA, NewRC, MRI);
1386 if (!NewVAddrSA)
1387 return;
1388 }
1389
1390 MI.setOpcode(NewOpcode);
1391
1392 if (NewVdata != AMDGPU::NoRegister) {
1393 MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
1394
1395 if (IsAtomic) {
1396 // Atomic operations have an additional operand (a copy of data)
1397 MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
1398 }
1399 }
1400
1401 if (NewVAddrSA) {
1402 MI.getOperand(VAddrSAIdx) = MCOperand::createReg(NewVAddrSA);
1403 } else if (IsNSA) {
1404 assert(AddrSize <= Info->VAddrDwords);
1405 MI.erase(MI.begin() + VAddr0Idx + AddrSize,
1406 MI.begin() + VAddr0Idx + Info->VAddrDwords);
1407 }
1408}
1409
1410// Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1411// decoder only adds to src_modifiers, so manually add the bits to the other
1412// operands.
1414 unsigned Opc = MI.getOpcode();
1415 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1416 auto Mods = collectVOPModifiers(MI, true);
1417
1418 if (MI.getNumOperands() < DescNumOps &&
1419 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
1420 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in);
1421
1422 if (MI.getNumOperands() < DescNumOps &&
1423 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel))
1425 AMDGPU::OpName::op_sel);
1426 if (MI.getNumOperands() < DescNumOps &&
1427 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel_hi))
1429 AMDGPU::OpName::op_sel_hi);
1430 if (MI.getNumOperands() < DescNumOps &&
1431 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_lo))
1433 AMDGPU::OpName::neg_lo);
1434 if (MI.getNumOperands() < DescNumOps &&
1435 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi))
1437 AMDGPU::OpName::neg_hi);
1438}
1439
1440// Create dummy old operand and insert optional operands
1442 unsigned Opc = MI.getOpcode();
1443 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1444
1445 if (MI.getNumOperands() < DescNumOps &&
1446 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::old))
1447 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1448
1449 if (MI.getNumOperands() < DescNumOps &&
1450 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1452 AMDGPU::OpName::src0_modifiers);
1453
1454 if (MI.getNumOperands() < DescNumOps &&
1455 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1457 AMDGPU::OpName::src1_modifiers);
1458}
1459
1461 unsigned Opc = MI.getOpcode();
1462 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1463
1465
1466 if (MI.getNumOperands() < DescNumOps &&
1467 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1470 AMDGPU::OpName::op_sel);
1471 }
1472}
1473
1475 assert(HasLiteral && "Should have decoded a literal");
1476 insertNamedMCOperand(MI, MCOperand::createImm(Literal), AMDGPU::OpName::immX);
1477}
1478
1479const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
1480 return getContext().getRegisterInfo()->
1481 getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
1482}
1483
1484inline
1486 const Twine& ErrMsg) const {
1487 *CommentStream << "Error: " + ErrMsg;
1488
1489 // ToDo: add support for error operands to MCInst.h
1490 // return MCOperand::createError(V);
1491 return MCOperand();
1492}
1493
1494inline
1497}
1498
1499inline
1501 unsigned Val) const {
1502 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1503 if (Val >= RegCl.getNumRegs())
1504 return errOperand(Val, Twine(getRegClassName(RegClassID)) +
1505 ": unknown register " + Twine(Val));
1506 return createRegOperand(RegCl.getRegister(Val));
1507}
1508
1509inline
1511 unsigned Val) const {
1512 // ToDo: SI/CI have 104 SGPRs, VI - 102
1513 // Valery: here we accepting as much as we can, let assembler sort it out
1514 int shift = 0;
1515 switch (SRegClassID) {
1516 case AMDGPU::SGPR_32RegClassID:
1517 case AMDGPU::TTMP_32RegClassID:
1518 break;
1519 case AMDGPU::SGPR_64RegClassID:
1520 case AMDGPU::TTMP_64RegClassID:
1521 shift = 1;
1522 break;
1523 case AMDGPU::SGPR_96RegClassID:
1524 case AMDGPU::TTMP_96RegClassID:
1525 case AMDGPU::SGPR_128RegClassID:
1526 case AMDGPU::TTMP_128RegClassID:
1527 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
1528 // this bundle?
1529 case AMDGPU::SGPR_256RegClassID:
1530 case AMDGPU::TTMP_256RegClassID:
1531 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
1532 // this bundle?
1533 case AMDGPU::SGPR_288RegClassID:
1534 case AMDGPU::TTMP_288RegClassID:
1535 case AMDGPU::SGPR_320RegClassID:
1536 case AMDGPU::TTMP_320RegClassID:
1537 case AMDGPU::SGPR_352RegClassID:
1538 case AMDGPU::TTMP_352RegClassID:
1539 case AMDGPU::SGPR_384RegClassID:
1540 case AMDGPU::TTMP_384RegClassID:
1541 case AMDGPU::SGPR_512RegClassID:
1542 case AMDGPU::TTMP_512RegClassID:
1543 shift = 2;
1544 break;
1545 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
1546 // this bundle?
1547 default:
1548 llvm_unreachable("unhandled register class");
1549 }
1550
1551 if (Val % (1 << shift)) {
1552 *CommentStream << "Warning: " << getRegClassName(SRegClassID)
1553 << ": scalar reg isn't aligned " << Val;
1554 }
1555
1556 return createRegOperand(SRegClassID, Val >> shift);
1557}
1558
1560 bool IsHi) const {
1561 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1562 return createRegOperand(AMDGPU::VGPR_16RegClassID, RegIdxInVGPR16);
1563}
1564
1565// Decode Literals for insts which always have a literal in the encoding
1568 if (HasLiteral) {
1569 assert(
1571 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1572 if (Literal != Val)
1573 return errOperand(Val, "More than one unique literal is illegal");
1574 }
1575 HasLiteral = true;
1576 Literal = Val;
1577 return MCOperand::createImm(Literal);
1578}
1579
1582 if (HasLiteral) {
1583 if (Literal64 != Val)
1584 return errOperand(Val, "More than one unique literal is illegal");
1585 }
1586 HasLiteral = true;
1587 Literal = Literal64 = Val;
1588 return MCOperand::createImm(Literal64);
1589}
1590
1592 // For now all literal constants are supposed to be unsigned integer
1593 // ToDo: deal with signed/unsigned 64-bit integer constants
1594 // ToDo: deal with float/double constants
1595 if (!HasLiteral) {
1596 if (Bytes.size() < 4) {
1597 return errOperand(0, "cannot read literal, inst bytes left " +
1598 Twine(Bytes.size()));
1599 }
1600 HasLiteral = true;
1601 Literal = Literal64 = eatBytes<uint32_t>(Bytes);
1602 if (ExtendFP64)
1603 Literal64 <<= 32;
1604 }
1605 return MCOperand::createImm(ExtendFP64 ? Literal64 : Literal);
1606}
1607
1609 assert(STI.hasFeature(AMDGPU::Feature64BitLiterals));
1610
1611 if (!HasLiteral) {
1612 if (Bytes.size() < 8) {
1613 return errOperand(0, "cannot read literal64, inst bytes left " +
1614 Twine(Bytes.size()));
1615 }
1616 HasLiteral = true;
1617 Literal64 = eatBytes<uint64_t>(Bytes);
1618 }
1619 return MCOperand::createImm(Literal64);
1620}
1621
1623 using namespace AMDGPU::EncValues;
1624
1625 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1626 return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1627 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1628 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1629 // Cast prevents negative overflow.
1630}
1631
1632static int64_t getInlineImmVal32(unsigned Imm) {
1633 switch (Imm) {
1634 case 240:
1635 return llvm::bit_cast<uint32_t>(0.5f);
1636 case 241:
1637 return llvm::bit_cast<uint32_t>(-0.5f);
1638 case 242:
1639 return llvm::bit_cast<uint32_t>(1.0f);
1640 case 243:
1641 return llvm::bit_cast<uint32_t>(-1.0f);
1642 case 244:
1643 return llvm::bit_cast<uint32_t>(2.0f);
1644 case 245:
1645 return llvm::bit_cast<uint32_t>(-2.0f);
1646 case 246:
1647 return llvm::bit_cast<uint32_t>(4.0f);
1648 case 247:
1649 return llvm::bit_cast<uint32_t>(-4.0f);
1650 case 248: // 1 / (2 * PI)
1651 return 0x3e22f983;
1652 default:
1653 llvm_unreachable("invalid fp inline imm");
1654 }
1655}
1656
1657static int64_t getInlineImmVal64(unsigned Imm) {
1658 switch (Imm) {
1659 case 240:
1660 return llvm::bit_cast<uint64_t>(0.5);
1661 case 241:
1662 return llvm::bit_cast<uint64_t>(-0.5);
1663 case 242:
1664 return llvm::bit_cast<uint64_t>(1.0);
1665 case 243:
1666 return llvm::bit_cast<uint64_t>(-1.0);
1667 case 244:
1668 return llvm::bit_cast<uint64_t>(2.0);
1669 case 245:
1670 return llvm::bit_cast<uint64_t>(-2.0);
1671 case 246:
1672 return llvm::bit_cast<uint64_t>(4.0);
1673 case 247:
1674 return llvm::bit_cast<uint64_t>(-4.0);
1675 case 248: // 1 / (2 * PI)
1676 return 0x3fc45f306dc9c882;
1677 default:
1678 llvm_unreachable("invalid fp inline imm");
1679 }
1680}
1681
1682static int64_t getInlineImmValF16(unsigned Imm) {
1683 switch (Imm) {
1684 case 240:
1685 return 0x3800;
1686 case 241:
1687 return 0xB800;
1688 case 242:
1689 return 0x3C00;
1690 case 243:
1691 return 0xBC00;
1692 case 244:
1693 return 0x4000;
1694 case 245:
1695 return 0xC000;
1696 case 246:
1697 return 0x4400;
1698 case 247:
1699 return 0xC400;
1700 case 248: // 1 / (2 * PI)
1701 return 0x3118;
1702 default:
1703 llvm_unreachable("invalid fp inline imm");
1704 }
1705}
1706
1707static int64_t getInlineImmValBF16(unsigned Imm) {
1708 switch (Imm) {
1709 case 240:
1710 return 0x3F00;
1711 case 241:
1712 return 0xBF00;
1713 case 242:
1714 return 0x3F80;
1715 case 243:
1716 return 0xBF80;
1717 case 244:
1718 return 0x4000;
1719 case 245:
1720 return 0xC000;
1721 case 246:
1722 return 0x4080;
1723 case 247:
1724 return 0xC080;
1725 case 248: // 1 / (2 * PI)
1726 return 0x3E22;
1727 default:
1728 llvm_unreachable("invalid fp inline imm");
1729 }
1730}
1731
1732unsigned AMDGPUDisassembler::getVgprClassId(unsigned Width) const {
1733 using namespace AMDGPU;
1734
1735 switch (Width) {
1736 case 16:
1737 case 32:
1738 return VGPR_32RegClassID;
1739 case 64:
1740 return VReg_64RegClassID;
1741 case 96:
1742 return VReg_96RegClassID;
1743 case 128:
1744 return VReg_128RegClassID;
1745 case 160:
1746 return VReg_160RegClassID;
1747 case 192:
1748 return VReg_192RegClassID;
1749 case 256:
1750 return VReg_256RegClassID;
1751 case 288:
1752 return VReg_288RegClassID;
1753 case 320:
1754 return VReg_320RegClassID;
1755 case 352:
1756 return VReg_352RegClassID;
1757 case 384:
1758 return VReg_384RegClassID;
1759 case 512:
1760 return VReg_512RegClassID;
1761 case 1024:
1762 return VReg_1024RegClassID;
1763 }
1764 llvm_unreachable("Invalid register width!");
1765}
1766
1767unsigned AMDGPUDisassembler::getAgprClassId(unsigned Width) const {
1768 using namespace AMDGPU;
1769
1770 switch (Width) {
1771 case 16:
1772 case 32:
1773 return AGPR_32RegClassID;
1774 case 64:
1775 return AReg_64RegClassID;
1776 case 96:
1777 return AReg_96RegClassID;
1778 case 128:
1779 return AReg_128RegClassID;
1780 case 160:
1781 return AReg_160RegClassID;
1782 case 256:
1783 return AReg_256RegClassID;
1784 case 288:
1785 return AReg_288RegClassID;
1786 case 320:
1787 return AReg_320RegClassID;
1788 case 352:
1789 return AReg_352RegClassID;
1790 case 384:
1791 return AReg_384RegClassID;
1792 case 512:
1793 return AReg_512RegClassID;
1794 case 1024:
1795 return AReg_1024RegClassID;
1796 }
1797 llvm_unreachable("Invalid register width!");
1798}
1799
1800unsigned AMDGPUDisassembler::getSgprClassId(unsigned Width) const {
1801 using namespace AMDGPU;
1802
1803 switch (Width) {
1804 case 16:
1805 case 32:
1806 return SGPR_32RegClassID;
1807 case 64:
1808 return SGPR_64RegClassID;
1809 case 96:
1810 return SGPR_96RegClassID;
1811 case 128:
1812 return SGPR_128RegClassID;
1813 case 160:
1814 return SGPR_160RegClassID;
1815 case 256:
1816 return SGPR_256RegClassID;
1817 case 288:
1818 return SGPR_288RegClassID;
1819 case 320:
1820 return SGPR_320RegClassID;
1821 case 352:
1822 return SGPR_352RegClassID;
1823 case 384:
1824 return SGPR_384RegClassID;
1825 case 512:
1826 return SGPR_512RegClassID;
1827 }
1828 llvm_unreachable("Invalid register width!");
1829}
1830
1831unsigned AMDGPUDisassembler::getTtmpClassId(unsigned Width) const {
1832 using namespace AMDGPU;
1833
1834 switch (Width) {
1835 case 16:
1836 case 32:
1837 return TTMP_32RegClassID;
1838 case 64:
1839 return TTMP_64RegClassID;
1840 case 128:
1841 return TTMP_128RegClassID;
1842 case 256:
1843 return TTMP_256RegClassID;
1844 case 288:
1845 return TTMP_288RegClassID;
1846 case 320:
1847 return TTMP_320RegClassID;
1848 case 352:
1849 return TTMP_352RegClassID;
1850 case 384:
1851 return TTMP_384RegClassID;
1852 case 512:
1853 return TTMP_512RegClassID;
1854 }
1855 llvm_unreachable("Invalid register width!");
1856}
1857
1858int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1859 using namespace AMDGPU::EncValues;
1860
1861 unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1862 unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1863
1864 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1865}
1866
1867MCOperand AMDGPUDisassembler::decodeSrcOp(unsigned Width, unsigned Val) const {
1868 using namespace AMDGPU::EncValues;
1869
1870 assert(Val < 1024); // enum10
1871
1872 bool IsAGPR = Val & 512;
1873 Val &= 511;
1874
1875 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1876 return createRegOperand(IsAGPR ? getAgprClassId(Width)
1877 : getVgprClassId(Width), Val - VGPR_MIN);
1878 }
1879 return decodeNonVGPRSrcOp(Width, Val & 0xFF);
1880}
1881
1883 unsigned Val) const {
1884 // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
1885 // decoded earlier.
1886 assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
1887 using namespace AMDGPU::EncValues;
1888
1889 if (Val <= SGPR_MAX) {
1890 // "SGPR_MIN <= Val" is always true and causes compilation warning.
1891 static_assert(SGPR_MIN == 0);
1892 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
1893 }
1894
1895 int TTmpIdx = getTTmpIdx(Val);
1896 if (TTmpIdx >= 0) {
1897 return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
1898 }
1899
1900 if ((INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) ||
1901 (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) ||
1902 Val == LITERAL_CONST)
1903 return MCOperand::createImm(Val);
1904
1905 if (Val == LITERAL64_CONST && STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
1906 return decodeLiteral64Constant();
1907 }
1908
1909 switch (Width) {
1910 case 32:
1911 case 16:
1912 return decodeSpecialReg32(Val);
1913 case 64:
1914 return decodeSpecialReg64(Val);
1915 case 96:
1916 case 128:
1917 case 256:
1918 case 512:
1919 return decodeSpecialReg96Plus(Val);
1920 default:
1921 llvm_unreachable("unexpected immediate type");
1922 }
1923}
1924
1925// Bit 0 of DstY isn't stored in the instruction, because it's always the
1926// opposite of bit 0 of DstX.
1928 unsigned Val) const {
1929 int VDstXInd =
1930 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
1931 assert(VDstXInd != -1);
1932 assert(Inst.getOperand(VDstXInd).isReg());
1933 unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg());
1934 Val |= ~XDstReg & 1;
1935 return createRegOperand(getVgprClassId(32), Val);
1936}
1937
1939 using namespace AMDGPU;
1940
1941 switch (Val) {
1942 // clang-format off
1943 case 102: return createRegOperand(FLAT_SCR_LO);
1944 case 103: return createRegOperand(FLAT_SCR_HI);
1945 case 104: return createRegOperand(XNACK_MASK_LO);
1946 case 105: return createRegOperand(XNACK_MASK_HI);
1947 case 106: return createRegOperand(VCC_LO);
1948 case 107: return createRegOperand(VCC_HI);
1949 case 108: return createRegOperand(TBA_LO);
1950 case 109: return createRegOperand(TBA_HI);
1951 case 110: return createRegOperand(TMA_LO);
1952 case 111: return createRegOperand(TMA_HI);
1953 case 124:
1954 return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0);
1955 case 125:
1956 return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
1957 case 126: return createRegOperand(EXEC_LO);
1958 case 127: return createRegOperand(EXEC_HI);
1959 case 230: return createRegOperand(SRC_FLAT_SCRATCH_BASE_LO);
1960 case 231: return createRegOperand(SRC_FLAT_SCRATCH_BASE_HI);
1961 case 235: return createRegOperand(SRC_SHARED_BASE_LO);
1962 case 236: return createRegOperand(SRC_SHARED_LIMIT_LO);
1963 case 237: return createRegOperand(SRC_PRIVATE_BASE_LO);
1964 case 238: return createRegOperand(SRC_PRIVATE_LIMIT_LO);
1965 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1966 case 251: return createRegOperand(SRC_VCCZ);
1967 case 252: return createRegOperand(SRC_EXECZ);
1968 case 253: return createRegOperand(SRC_SCC);
1969 case 254: return createRegOperand(LDS_DIRECT);
1970 default: break;
1971 // clang-format on
1972 }
1973 return errOperand(Val, "unknown operand encoding " + Twine(Val));
1974}
1975
1977 using namespace AMDGPU;
1978
1979 switch (Val) {
1980 case 102: return createRegOperand(FLAT_SCR);
1981 case 104: return createRegOperand(XNACK_MASK);
1982 case 106: return createRegOperand(VCC);
1983 case 108: return createRegOperand(TBA);
1984 case 110: return createRegOperand(TMA);
1985 case 124:
1986 if (isGFX11Plus())
1987 return createRegOperand(SGPR_NULL);
1988 break;
1989 case 125:
1990 if (!isGFX11Plus())
1991 return createRegOperand(SGPR_NULL);
1992 break;
1993 case 126: return createRegOperand(EXEC);
1994 case 230: return createRegOperand(SRC_FLAT_SCRATCH_BASE_LO);
1995 case 235: return createRegOperand(SRC_SHARED_BASE);
1996 case 236: return createRegOperand(SRC_SHARED_LIMIT);
1997 case 237: return createRegOperand(SRC_PRIVATE_BASE);
1998 case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
1999 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
2000 case 251: return createRegOperand(SRC_VCCZ);
2001 case 252: return createRegOperand(SRC_EXECZ);
2002 case 253: return createRegOperand(SRC_SCC);
2003 default: break;
2004 }
2005 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2006}
2007
2009 using namespace AMDGPU;
2010
2011 switch (Val) {
2012 case 124:
2013 if (isGFX11Plus())
2014 return createRegOperand(SGPR_NULL);
2015 break;
2016 case 125:
2017 if (!isGFX11Plus())
2018 return createRegOperand(SGPR_NULL);
2019 break;
2020 default:
2021 break;
2022 }
2023 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2024}
2025
2027 const unsigned Val) const {
2028 using namespace AMDGPU::SDWA;
2029 using namespace AMDGPU::EncValues;
2030
2031 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
2032 STI.hasFeature(AMDGPU::FeatureGFX10)) {
2033 // XXX: cast to int is needed to avoid stupid warning:
2034 // compare with unsigned is always true
2035 if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
2036 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
2037 return createRegOperand(getVgprClassId(Width),
2038 Val - SDWA9EncValues::SRC_VGPR_MIN);
2039 }
2040 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
2041 Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
2042 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
2043 return createSRegOperand(getSgprClassId(Width),
2044 Val - SDWA9EncValues::SRC_SGPR_MIN);
2045 }
2046 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
2047 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
2048 return createSRegOperand(getTtmpClassId(Width),
2049 Val - SDWA9EncValues::SRC_TTMP_MIN);
2050 }
2051
2052 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
2053
2054 if ((INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) ||
2055 (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX))
2056 return MCOperand::createImm(SVal);
2057
2058 return decodeSpecialReg32(SVal);
2059 }
2060 if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands))
2061 return createRegOperand(getVgprClassId(Width), Val);
2062 llvm_unreachable("unsupported target");
2063}
2064
2066 return decodeSDWASrc(16, Val);
2067}
2068
2070 return decodeSDWASrc(32, Val);
2071}
2072
2074 using namespace AMDGPU::SDWA;
2075
2076 assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
2077 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
2078 "SDWAVopcDst should be present only on GFX9+");
2079
2080 bool IsWave32 = STI.hasFeature(AMDGPU::FeatureWavefrontSize32);
2081
2082 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
2083 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
2084
2085 int TTmpIdx = getTTmpIdx(Val);
2086 if (TTmpIdx >= 0) {
2087 auto TTmpClsId = getTtmpClassId(IsWave32 ? 32 : 64);
2088 return createSRegOperand(TTmpClsId, TTmpIdx);
2089 }
2090 if (Val > SGPR_MAX) {
2091 return IsWave32 ? decodeSpecialReg32(Val) : decodeSpecialReg64(Val);
2092 }
2093 return createSRegOperand(getSgprClassId(IsWave32 ? 32 : 64), Val);
2094 }
2095 return createRegOperand(IsWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC);
2096}
2097
2099 return STI.hasFeature(AMDGPU::FeatureWavefrontSize32) ? decodeSrcOp(32, Val)
2100 : decodeSrcOp(64, Val);
2101}
2102
2104 return decodeSrcOp(32, Val);
2105}
2106
2109 return MCOperand();
2110 return MCOperand::createImm(Val);
2111}
2112
2114 using VersionField = AMDGPU::EncodingField<7, 0>;
2115 using W64Bit = AMDGPU::EncodingBit<13>;
2116 using W32Bit = AMDGPU::EncodingBit<14>;
2117 using MDPBit = AMDGPU::EncodingBit<15>;
2119
2120 auto [Version, W64, W32, MDP] = Encoding::decode(Imm);
2121
2122 // Decode into a plain immediate if any unused bits are raised.
2123 if (Encoding::encode(Version, W64, W32, MDP) != Imm)
2124 return MCOperand::createImm(Imm);
2125
2126 const auto &Versions = AMDGPU::UCVersion::getGFXVersions();
2127 const auto *I = find_if(
2128 Versions, [Version = Version](const AMDGPU::UCVersion::GFXVersion &V) {
2129 return V.Code == Version;
2130 });
2131 MCContext &Ctx = getContext();
2132 const MCExpr *E;
2133 if (I == Versions.end())
2135 else
2136 E = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(I->Symbol), Ctx);
2137
2138 if (W64)
2139 E = MCBinaryExpr::createOr(E, UCVersionW64Expr, Ctx);
2140 if (W32)
2141 E = MCBinaryExpr::createOr(E, UCVersionW32Expr, Ctx);
2142 if (MDP)
2143 E = MCBinaryExpr::createOr(E, UCVersionMDPExpr, Ctx);
2144
2145 return MCOperand::createExpr(E);
2146}
2147
2149 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2150}
2151
2153
2155 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2156}
2157
2159
2161
2165
2167 return STI.hasFeature(AMDGPU::FeatureGFX11);
2168}
2169
2173
2175 return STI.hasFeature(AMDGPU::FeatureGFX12);
2176}
2177
2181
2183
2185 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2186}
2187
2191
2192//===----------------------------------------------------------------------===//
2193// AMDGPU specific symbol handling
2194//===----------------------------------------------------------------------===//
2195
2196/// Print a string describing the reserved bit range specified by Mask with
2197/// offset BaseBytes for use in error comments. Mask is a single continuous
2198/// range of 1s surrounded by zeros. The format here is meant to align with the
2199/// tables that describe these bits in llvm.org/docs/AMDGPUUsage.html.
2200static SmallString<32> getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes) {
2201 SmallString<32> Result;
2202 raw_svector_ostream S(Result);
2203
2204 int TrailingZeros = llvm::countr_zero(Mask);
2205 int PopCount = llvm::popcount(Mask);
2206
2207 if (PopCount == 1) {
2208 S << "bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2209 } else {
2210 S << "bits in range ("
2211 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) << ':'
2212 << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2213 }
2214
2215 return Result;
2216}
2217
2218#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
2219#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2220 do { \
2221 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
2222 } while (0)
2223#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
2224 do { \
2225 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
2226 << GET_FIELD(MASK) << '\n'; \
2227 } while (0)
2228
2229#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
2230 do { \
2231 if (FourByteBuffer & (MASK)) { \
2232 return createStringError(std::errc::invalid_argument, \
2233 "kernel descriptor " DESC \
2234 " reserved %s set" MSG, \
2235 getBitRangeFromMask((MASK), 0).c_str()); \
2236 } \
2237 } while (0)
2238
2239#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
2240#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
2241 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
2242#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
2243 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
2244#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
2245 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
2246
2247// NOLINTNEXTLINE(readability-identifier-naming)
2249 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2250 using namespace amdhsa;
2251 StringRef Indent = "\t";
2252
2253 // We cannot accurately backward compute #VGPRs used from
2254 // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
2255 // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
2256 // simply calculate the inverse of what the assembler does.
2257
2258 uint32_t GranulatedWorkitemVGPRCount =
2259 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
2260
2261 uint32_t NextFreeVGPR =
2262 (GranulatedWorkitemVGPRCount + 1) *
2263 AMDGPU::IsaInfo::getVGPREncodingGranule(&STI, EnableWavefrontSize32);
2264
2265 KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
2266
2267 // We cannot backward compute values used to calculate
2268 // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
2269 // directives can't be computed:
2270 // .amdhsa_reserve_vcc
2271 // .amdhsa_reserve_flat_scratch
2272 // .amdhsa_reserve_xnack_mask
2273 // They take their respective default values if not specified in the assembly.
2274 //
2275 // GRANULATED_WAVEFRONT_SGPR_COUNT
2276 // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
2277 //
2278 // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
2279 // are set to 0. So while disassembling we consider that:
2280 //
2281 // GRANULATED_WAVEFRONT_SGPR_COUNT
2282 // = f(NEXT_FREE_SGPR + 0 + 0 + 0)
2283 //
2284 // The disassembler cannot recover the original values of those 3 directives.
2285
2286 uint32_t GranulatedWavefrontSGPRCount =
2287 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
2288
2289 if (isGFX10Plus())
2290 CHECK_RESERVED_BITS_MSG(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
2291 "must be zero on gfx10+");
2292
2293 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
2295
2296 KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
2298 KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
2299 KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n';
2300 KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
2301
2302 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY);
2303
2304 PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
2305 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
2306 PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
2307 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
2308 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
2309 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
2310 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
2311 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
2312
2313 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV);
2314
2315 if (!isGFX12Plus())
2316 PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
2317 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
2318
2319 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE);
2320
2321 if (!isGFX12Plus())
2322 PRINT_DIRECTIVE(".amdhsa_ieee_mode",
2323 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
2324
2325 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_BULKY);
2326 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_CDBG_USER);
2327
2328 // Bits [26].
2329 if (isGFX9Plus()) {
2330 PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
2331 } else {
2332 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0,
2333 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx9");
2334 }
2335
2336 // Bits [27].
2337 if (isGFX1250()) {
2338 PRINT_PSEUDO_DIRECTIVE_COMMENT("FLAT_SCRATCH_IS_NV",
2339 COMPUTE_PGM_RSRC1_GFX125_FLAT_SCRATCH_IS_NV);
2340 } else {
2341 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX120_RESERVED1,
2342 "COMPUTE_PGM_RSRC1");
2343 }
2344
2345 // Bits [28].
2346 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED2, "COMPUTE_PGM_RSRC1");
2347
2348 // Bits [29-31].
2349 if (isGFX10Plus()) {
2350 // WGP_MODE is not available on GFX1250.
2351 if (!isGFX1250()) {
2352 PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
2353 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
2354 }
2355 PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
2356 PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
2357 } else {
2358 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED3,
2359 "COMPUTE_PGM_RSRC1");
2360 }
2361
2362 if (isGFX12Plus())
2363 PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
2364 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
2365
2366 return true;
2367}
2368
2369// NOLINTNEXTLINE(readability-identifier-naming)
2371 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2372 using namespace amdhsa;
2373 StringRef Indent = "\t";
2375 PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
2376 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2377 else
2378 PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
2379 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2380 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
2381 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2382 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
2383 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2384 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
2385 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2386 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
2387 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2388 PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
2389 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2390
2391 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH);
2392 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY);
2393 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE);
2394
2396 ".amdhsa_exception_fp_ieee_invalid_op",
2397 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2398 PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
2399 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2401 ".amdhsa_exception_fp_ieee_div_zero",
2402 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2403 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
2404 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2405 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
2406 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2407 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
2408 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2409 PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
2410 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2411
2412 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC2_RESERVED0, "COMPUTE_PGM_RSRC2");
2413
2414 return true;
2415}
2416
2417// NOLINTNEXTLINE(readability-identifier-naming)
2419 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2420 using namespace amdhsa;
2421 StringRef Indent = "\t";
2422 if (isGFX90A()) {
2423 KdStream << Indent << ".amdhsa_accum_offset "
2424 << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2425 << '\n';
2426
2427 PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2428
2429 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED0,
2430 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2431 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED1,
2432 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2433 } else if (isGFX10Plus()) {
2434 // Bits [0-3].
2435 if (!isGFX12Plus()) {
2436 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2437 PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
2438 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2439 } else {
2441 "SHARED_VGPR_COUNT",
2442 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2443 }
2444 } else {
2445 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0,
2446 "COMPUTE_PGM_RSRC3",
2447 "must be zero on gfx12+");
2448 }
2449
2450 // Bits [4-11].
2451 if (isGFX11()) {
2452 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2453 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2454 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
2455 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2456 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
2457 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2458 } else if (isGFX12Plus()) {
2459 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2460 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2461 } else {
2462 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1,
2463 "COMPUTE_PGM_RSRC3",
2464 "must be zero on gfx10");
2465 }
2466
2467 // Bits [12].
2468 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2,
2469 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2470
2471 // Bits [13].
2472 if (isGFX12Plus()) {
2474 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2475 } else {
2476 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3,
2477 "COMPUTE_PGM_RSRC3",
2478 "must be zero on gfx10 or gfx11");
2479 }
2480
2481 // Bits [14-21].
2482 if (isGFX1250()) {
2483 PRINT_DIRECTIVE(".amdhsa_named_barrier_count",
2484 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT);
2486 "ENABLE_DYNAMIC_VGPR", COMPUTE_PGM_RSRC3_GFX125_ENABLE_DYNAMIC_VGPR);
2488 COMPUTE_PGM_RSRC3_GFX125_TCP_SPLIT);
2490 "ENABLE_DIDT_THROTTLE",
2491 COMPUTE_PGM_RSRC3_GFX125_ENABLE_DIDT_THROTTLE);
2492 } else {
2493 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX120_RESERVED4,
2494 "COMPUTE_PGM_RSRC3",
2495 "must be zero on gfx10+");
2496 }
2497
2498 // Bits [22-30].
2499 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED5,
2500 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2501
2502 // Bits [31].
2503 if (isGFX11Plus()) {
2505 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2506 } else {
2507 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED6,
2508 "COMPUTE_PGM_RSRC3",
2509 "must be zero on gfx10");
2510 }
2511 } else if (FourByteBuffer) {
2512 return createStringError(
2513 std::errc::invalid_argument,
2514 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2515 }
2516 return true;
2517}
2518#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2519#undef PRINT_DIRECTIVE
2520#undef GET_FIELD
2521#undef CHECK_RESERVED_BITS_IMPL
2522#undef CHECK_RESERVED_BITS
2523#undef CHECK_RESERVED_BITS_MSG
2524#undef CHECK_RESERVED_BITS_DESC
2525#undef CHECK_RESERVED_BITS_DESC_MSG
2526
2527/// Create an error object to return from onSymbolStart for reserved kernel
2528/// descriptor bits being set.
2529static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes,
2530 const char *Msg = "") {
2531 return createStringError(
2532 std::errc::invalid_argument, "kernel descriptor reserved %s set%s%s",
2533 getBitRangeFromMask(Mask, BaseBytes).c_str(), *Msg ? ", " : "", Msg);
2534}
2535
2536/// Create an error object to return from onSymbolStart for reserved kernel
2537/// descriptor bytes being set.
2538static Error createReservedKDBytesError(unsigned BaseInBytes,
2539 unsigned WidthInBytes) {
2540 // Create an error comment in the same format as the "Kernel Descriptor"
2541 // table here: https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor .
2542 return createStringError(
2543 std::errc::invalid_argument,
2544 "kernel descriptor reserved bits in range (%u:%u) set",
2545 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2546}
2547
2550 raw_string_ostream &KdStream) const {
2551#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2552 do { \
2553 KdStream << Indent << DIRECTIVE " " \
2554 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2555 } while (0)
2556
2557 uint16_t TwoByteBuffer = 0;
2558 uint32_t FourByteBuffer = 0;
2559
2560 StringRef ReservedBytes;
2561 StringRef Indent = "\t";
2562
2563 assert(Bytes.size() == 64);
2564 DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
2565
2566 switch (Cursor.tell()) {
2568 FourByteBuffer = DE.getU32(Cursor);
2569 KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2570 << '\n';
2571 return true;
2572
2574 FourByteBuffer = DE.getU32(Cursor);
2575 KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2576 << FourByteBuffer << '\n';
2577 return true;
2578
2580 FourByteBuffer = DE.getU32(Cursor);
2581 KdStream << Indent << ".amdhsa_kernarg_size "
2582 << FourByteBuffer << '\n';
2583 return true;
2584
2586 // 4 reserved bytes, must be 0.
2587 ReservedBytes = DE.getBytes(Cursor, 4);
2588 for (int I = 0; I < 4; ++I) {
2589 if (ReservedBytes[I] != 0)
2591 }
2592 return true;
2593
2595 // KERNEL_CODE_ENTRY_BYTE_OFFSET
2596 // So far no directive controls this for Code Object V3, so simply skip for
2597 // disassembly.
2598 DE.skip(Cursor, 8);
2599 return true;
2600
2602 // 20 reserved bytes, must be 0.
2603 ReservedBytes = DE.getBytes(Cursor, 20);
2604 for (int I = 0; I < 20; ++I) {
2605 if (ReservedBytes[I] != 0)
2607 }
2608 return true;
2609
2611 FourByteBuffer = DE.getU32(Cursor);
2612 return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
2613
2615 FourByteBuffer = DE.getU32(Cursor);
2616 return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
2617
2619 FourByteBuffer = DE.getU32(Cursor);
2620 return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
2621
2623 using namespace amdhsa;
2624 TwoByteBuffer = DE.getU16(Cursor);
2625
2627 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2628 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2629 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2630 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2631 PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2632 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2633 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2634 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2635 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2636 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2638 PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2639 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2640 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2641 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2642 if (isGFX1250())
2643 PRINT_DIRECTIVE(".amdhsa_uses_cu_stores",
2644 KERNEL_CODE_PROPERTY_USES_CU_STORES);
2645
2646 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2647 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED0,
2649
2650 // Reserved for GFX9
2651 if (isGFX9() &&
2652 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2654 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2655 amdhsa::KERNEL_CODE_PROPERTIES_OFFSET, "must be zero on gfx9");
2656 }
2657 if (isGFX10Plus()) {
2658 PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2659 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2660 }
2661
2662 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
2663 PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
2664 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2665
2666 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2667 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED1,
2669 }
2670
2671 return true;
2672
2674 using namespace amdhsa;
2675 TwoByteBuffer = DE.getU16(Cursor);
2676 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2677 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2678 KERNARG_PRELOAD_SPEC_LENGTH);
2679 }
2680
2681 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2682 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2683 KERNARG_PRELOAD_SPEC_OFFSET);
2684 }
2685 return true;
2686
2688 // 4 bytes from here are reserved, must be 0.
2689 ReservedBytes = DE.getBytes(Cursor, 4);
2690 for (int I = 0; I < 4; ++I) {
2691 if (ReservedBytes[I] != 0)
2693 }
2694 return true;
2695
2696 default:
2697 llvm_unreachable("Unhandled index. Case statements cover everything.");
2698 return true;
2699 }
2700#undef PRINT_DIRECTIVE
2701}
2702
2704 StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2705
2706 // CP microcode requires the kernel descriptor to be 64 aligned.
2707 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2708 return createStringError(std::errc::invalid_argument,
2709 "kernel descriptor must be 64-byte aligned");
2710
2711 // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
2712 // requires us to know the setting of .amdhsa_wavefront_size32 in order to
2713 // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
2714 // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
2715 // when required.
2716 if (isGFX10Plus()) {
2717 uint16_t KernelCodeProperties =
2720 EnableWavefrontSize32 =
2721 AMDHSA_BITS_GET(KernelCodeProperties,
2722 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2723 }
2724
2725 std::string Kd;
2726 raw_string_ostream KdStream(Kd);
2727 KdStream << ".amdhsa_kernel " << KdName << '\n';
2728
2730 while (C && C.tell() < Bytes.size()) {
2731 Expected<bool> Res = decodeKernelDescriptorDirective(C, Bytes, KdStream);
2732
2733 cantFail(C.takeError());
2734
2735 if (!Res)
2736 return Res;
2737 }
2738 KdStream << ".end_amdhsa_kernel\n";
2739 outs() << KdStream.str();
2740 return true;
2741}
2742
2744 uint64_t &Size,
2745 ArrayRef<uint8_t> Bytes,
2746 uint64_t Address) const {
2747 // Right now only kernel descriptor needs to be handled.
2748 // We ignore all other symbols for target specific handling.
2749 // TODO:
2750 // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2751 // Object V2 and V3 when symbols are marked protected.
2752
2753 // amd_kernel_code_t for Code Object V2.
2754 if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2755 Size = 256;
2756 return createStringError(std::errc::invalid_argument,
2757 "code object v2 is not supported");
2758 }
2759
2760 // Code Object V3 kernel descriptors.
2761 StringRef Name = Symbol.Name;
2762 if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(StringRef(".kd"))) {
2763 Size = 64; // Size = 64 regardless of success or failure.
2764 return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
2765 }
2766
2767 return false;
2768}
2769
2770const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(StringRef Id,
2771 int64_t Val) {
2772 MCContext &Ctx = getContext();
2773 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2774 // Note: only set value to Val on a new symbol in case an dissassembler
2775 // has already been initialized in this context.
2776 if (!Sym->isVariable()) {
2778 } else {
2779 int64_t Res = ~Val;
2780 bool Valid = Sym->getVariableValue()->evaluateAsAbsolute(Res);
2781 if (!Valid || Res != Val)
2782 Ctx.reportWarning(SMLoc(), "unsupported redefinition of " + Id);
2783 }
2784 return MCSymbolRefExpr::create(Sym, Ctx);
2785}
2786
2788 const uint64_t TSFlags = MCII->get(MI.getOpcode()).TSFlags;
2789
2790 // Check for MUBUF and MTBUF instructions
2791 if (TSFlags & (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))
2792 return true;
2793
2794 // Check for SMEM buffer instructions (S_BUFFER_* instructions)
2795 if ((TSFlags & SIInstrFlags::SMRD) && AMDGPU::getSMEMIsBuffer(MI.getOpcode()))
2796 return true;
2797
2798 return false;
2799}
2800
2801//===----------------------------------------------------------------------===//
2802// AMDGPUSymbolizer
2803//===----------------------------------------------------------------------===//
2804
2805// Try to find symbol name for specified label
2807 MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2808 uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2809 uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2810
2811 if (!IsBranch) {
2812 return false;
2813 }
2814
2815 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2816 if (!Symbols)
2817 return false;
2818
2819 auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) {
2820 return Val.Addr == static_cast<uint64_t>(Value) &&
2821 Val.Type == ELF::STT_NOTYPE;
2822 });
2823 if (Result != Symbols->end()) {
2824 auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
2825 const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
2827 return true;
2828 }
2829 // Add to list of referenced addresses, so caller can synthesize a label.
2830 ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
2831 return false;
2832}
2833
2835 int64_t Value,
2836 uint64_t Address) {
2837 llvm_unreachable("unimplemented");
2838}
2839
2840//===----------------------------------------------------------------------===//
2841// Initialization
2842//===----------------------------------------------------------------------===//
2843
2845 LLVMOpInfoCallback /*GetOpInfo*/,
2846 LLVMSymbolLookupCallback /*SymbolLookUp*/,
2847 void *DisInfo,
2848 MCContext *Ctx,
2849 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
2850 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
2851}
2852
2854 const MCSubtargetInfo &STI,
2855 MCContext &Ctx) {
2856 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
2857}
2858
2859extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
unsigned const MachineRegisterInfo * MRI
MCDisassembler::DecodeStatus DecodeStatus
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
#define CHECK_RESERVED_BITS_DESC(MASK, DESC)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, AMDGPU::OpName Name)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_KImmFP64(MCInst &Inst, uint64_t Imm, uint64_t Addr, const MCDisassembler *Decoder)
static SmallString< 32 > getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes)
Print a string describing the reserved bit range specified by Mask with offset BaseBytes for use in e...
#define DECODE_OPERAND_SREG_8(RegClass, OpWidth)
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static std::bitset< 128 > eat16Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_SREG_7(RegClass, OpWidth)
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, unsigned OpWidth, unsigned Imm, unsigned EncImm, const MCDisassembler *Decoder)
static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValBF16(unsigned Imm)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_8(RegClass)
static unsigned CheckVGPROverflow(unsigned Reg, const MCRegisterClass &RC, const MCRegisterInfo &MRI)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define CHECK_RESERVED_BITS(MASK)
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define SGPR_MAX
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_MSG(MASK, MSG)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValF16(unsigned Imm)
#define GET_FIELD(MASK)
static std::bitset< 96 > eat12Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static Error createReservedKDBytesError(unsigned BaseInBytes, unsigned WidthInBytes)
Create an error object to return from onSymbolStart for reserved kernel descriptor bytes being set.
static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)
static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, const char *Msg="")
Create an error object to return from onSymbolStart for reserved kernel descriptor bits being set.
static void adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo &MRI, MCOperand &MO, uint8_t NumRegs)
Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the appropriate subregister fo...
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
#define T
MachineInstr unsigned OpIdx
Interface definition for SIRegisterInfo.
MCOperand decodeLiteral64Constant() const
void convertVOPC64DPPInst(MCInst &MI) const
bool isBufferInstruction(const MCInst &MI) const
Check if the instruction is a buffer operation (MUBUF, MTBUF, or S_BUFFER)
void convertEXPInst(MCInst &MI) const
MCOperand createRegOperand(unsigned int RegId) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
Expected< bool > decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
Expected< bool > decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
void convertVOPCDPPInst(MCInst &MI) const
MCOperand decodeSpecialReg96Plus(unsigned Val) const
MCOperand decodeSDWASrc32(unsigned Val) const
void setABIVersion(unsigned Version) override
ELF-specific, set the ABI version from the object header.
Expected< bool > decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
unsigned getAgprClassId(unsigned Width) const
MCOperand decodeDpp8FI(unsigned Val) const
MCOperand decodeSDWASrc(unsigned Width, unsigned Val) const
void convertFMAanyK(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
MCOperand decodeBoolReg(unsigned Val) const
void convertDPP8Inst(MCInst &MI) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
MCOperand decodeVersionImm(unsigned Imm) const
Expected< bool > decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
MCOperand decodeSplitBarrier(unsigned Val) const
void convertVOP3DPPInst(MCInst &MI) const
void convertTrue16OpSel(MCInst &MI) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
Expected< bool > decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
MCOperand decodeNonVGPRSrcOp(unsigned Width, unsigned Val) const
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand decodeLiteralConstant(bool ExtendFP64) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
void convertVINTERPInst(MCInst &MI) const
void convertSDWAInst(MCInst &MI) const
MCOperand decodeSrcOp(unsigned Width, unsigned Val) const
unsigned getSgprClassId(unsigned Width) const
static MCOperand decodeIntImmed(unsigned Imm)
void convertWMMAInst(MCInst &MI) const
unsigned getVgprClassId(unsigned Width) const
void convertMAIInst(MCInst &MI) const
f8f6f4 instructions have different pseudos depending on the used formats.
unsigned getTtmpClassId(unsigned Width) const
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
MCOperand decodeMandatoryLiteral64Constant(uint64_t Imm) const
void convertMIMGInst(MCInst &MI) const
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
void convertVOP3PDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
MCOperand decodeSDWASrc16(unsigned Val) const
Expected< bool > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address) const override
Used to perform separate target specific disassembly for a particular symbol.
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
const T * data() const
Definition ArrayRef.h:144
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:191
A class representing a position in a DataExtractor, as well as any error encountered during extractio...
LLVM_ABI uint32_t getU32(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint32_t value from *offset_ptr.
LLVM_ABI uint16_t getU16(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint16_t value from *offset_ptr.
LLVM_ABI void skip(Cursor &C, uint64_t Length) const
Advance the Cursor position by the given number of bytes.
LLVM_ABI StringRef getBytes(uint64_t *OffsetPtr, uint64_t Length, Error *Err=nullptr) const
Extract a fixed number of bytes from the specified offset.
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
Tagged union holding either a T or a Error.
Definition Error.h:485
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:408
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
const MCRegisterInfo * getRegisterInfo() const
Definition MCContext.h:414
Superclass for all disassemblers.
MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getNumOperands() const
Definition MCInst.h:212
unsigned getOpcode() const
Definition MCInst.h:202
void addOperand(const MCOperand Op)
Definition MCInst.h:215
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Interface to description of machine instruction set.
Definition MCInstrInfo.h:27
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
int64_t getImm() const
Definition MCInst.h:84
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
void setReg(MCRegister Reg)
Set the register number.
Definition MCInst.h:79
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
bool isValid() const
Definition MCInst.h:64
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getSizeInBits() const
Return the size of the physical register in bits if we are able to determine it.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
Generic base class for all target subtargets.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
Symbolize and annotate disassembled instructions.
Represents a location in source code.
Definition SMLoc.h:23
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM Value Representation.
Definition Value.h:75
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
A raw_ostream that writes to an SmallVector or SmallString.
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
ArrayRef< GFXVersion > getGFXVersions()
EncodingField< Bit, Bit, D > EncodingBit
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isGFX10(const MCSubtargetInfo &STI)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool getSMEMIsBuffer(unsigned Opc)
bool isVOPC64DPP(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
const MFMA_F8F6F4_Info * getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA, unsigned FmtB, unsigned F8F8Opcode)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:202
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:209
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:222
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:219
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:224
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:206
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:208
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:207
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:218
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:216
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:205
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:225
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:238
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:220
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:203
bool hasGDS(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool isGFX1250(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool hasVOPD(const MCSubtargetInfo &STI)
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ STT_NOTYPE
Definition ELF.h:1408
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1422
@ STT_OBJECT
Definition ELF.h:1409
value_type read(const void *memory, endianness endian)
Read a value of a particular endianness from memory.
Definition Endian.h:58
uint16_t read16(const void *P, endianness E)
Definition Endian.h:406
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:307
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2474
LLVM_ABI raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
Op::Description Desc
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:157
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition Error.h:769
Target & getTheGCNTarget()
The target for GCN GPUs.
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition VE.h:376
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1760
std::vector< SymbolInfoTy > SectionSymbolsTy
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:577
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.