LLVM 22.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
16#include "SIDefines.h"
17#include "SIInstrInfo.h"
22#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/StringSet.h"
25#include "llvm/ADT/Twine.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInst.h"
32#include "llvm/MC/MCInstrDesc.h"
38#include "llvm/MC/MCSymbol.h"
46#include <optional>
47
48using namespace llvm;
49using namespace llvm::AMDGPU;
50using namespace llvm::amdhsa;
51
52namespace {
53
54class AMDGPUAsmParser;
55
56enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
57
58//===----------------------------------------------------------------------===//
59// Operand
60//===----------------------------------------------------------------------===//
61
62class AMDGPUOperand : public MCParsedAsmOperand {
63 enum KindTy {
64 Token,
65 Immediate,
66 Register,
67 Expression
68 } Kind;
69
70 SMLoc StartLoc, EndLoc;
71 const AMDGPUAsmParser *AsmParser;
72
73public:
74 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
75 : Kind(Kind_), AsmParser(AsmParser_) {}
76
77 using Ptr = std::unique_ptr<AMDGPUOperand>;
78
79 struct Modifiers {
80 bool Abs = false;
81 bool Neg = false;
82 bool Sext = false;
83 bool Lit = false;
84 bool Lit64 = false;
85
86 bool hasFPModifiers() const { return Abs || Neg; }
87 bool hasIntModifiers() const { return Sext; }
88 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
89
90 int64_t getFPModifiersOperand() const {
91 int64_t Operand = 0;
92 Operand |= Abs ? SISrcMods::ABS : 0u;
93 Operand |= Neg ? SISrcMods::NEG : 0u;
94 return Operand;
95 }
96
97 int64_t getIntModifiersOperand() const {
98 int64_t Operand = 0;
99 Operand |= Sext ? SISrcMods::SEXT : 0u;
100 return Operand;
101 }
102
103 int64_t getModifiersOperand() const {
104 assert(!(hasFPModifiers() && hasIntModifiers())
105 && "fp and int modifiers should not be used simultaneously");
106 if (hasFPModifiers())
107 return getFPModifiersOperand();
108 if (hasIntModifiers())
109 return getIntModifiersOperand();
110 return 0;
111 }
112
113 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
114 };
115
116 enum ImmTy {
117 ImmTyNone,
118 ImmTyGDS,
119 ImmTyLDS,
120 ImmTyOffen,
121 ImmTyIdxen,
122 ImmTyAddr64,
123 ImmTyOffset,
124 ImmTyInstOffset,
125 ImmTyOffset0,
126 ImmTyOffset1,
127 ImmTySMEMOffsetMod,
128 ImmTyCPol,
129 ImmTyTFE,
130 ImmTyD16,
131 ImmTyClamp,
132 ImmTyOModSI,
133 ImmTySDWADstSel,
134 ImmTySDWASrc0Sel,
135 ImmTySDWASrc1Sel,
136 ImmTySDWADstUnused,
137 ImmTyDMask,
138 ImmTyDim,
139 ImmTyUNorm,
140 ImmTyDA,
141 ImmTyR128A16,
142 ImmTyA16,
143 ImmTyLWE,
144 ImmTyExpTgt,
145 ImmTyExpCompr,
146 ImmTyExpVM,
147 ImmTyFORMAT,
148 ImmTyHwreg,
149 ImmTyOff,
150 ImmTySendMsg,
151 ImmTyInterpSlot,
152 ImmTyInterpAttr,
153 ImmTyInterpAttrChan,
154 ImmTyOpSel,
155 ImmTyOpSelHi,
156 ImmTyNegLo,
157 ImmTyNegHi,
158 ImmTyIndexKey8bit,
159 ImmTyIndexKey16bit,
160 ImmTyIndexKey32bit,
161 ImmTyDPP8,
162 ImmTyDppCtrl,
163 ImmTyDppRowMask,
164 ImmTyDppBankMask,
165 ImmTyDppBoundCtrl,
166 ImmTyDppFI,
167 ImmTySwizzle,
168 ImmTyGprIdxMode,
169 ImmTyHigh,
170 ImmTyBLGP,
171 ImmTyCBSZ,
172 ImmTyABID,
173 ImmTyEndpgm,
174 ImmTyWaitVDST,
175 ImmTyWaitEXP,
176 ImmTyWaitVAVDst,
177 ImmTyWaitVMVSrc,
178 ImmTyBitOp3,
179 ImmTyMatrixAFMT,
180 ImmTyMatrixBFMT,
181 ImmTyMatrixAScale,
182 ImmTyMatrixBScale,
183 ImmTyMatrixAScaleFmt,
184 ImmTyMatrixBScaleFmt,
185 ImmTyMatrixAReuse,
186 ImmTyMatrixBReuse,
187 ImmTyScaleSel,
188 ImmTyByteSel,
189 };
190
191 // Immediate operand kind.
192 // It helps to identify the location of an offending operand after an error.
193 // Note that regular literals and mandatory literals (KImm) must be handled
194 // differently. When looking for an offending operand, we should usually
195 // ignore mandatory literals because they are part of the instruction and
196 // cannot be changed. Report location of mandatory operands only for VOPD,
197 // when both OpX and OpY have a KImm and there are no other literals.
198 enum ImmKindTy {
199 ImmKindTyNone,
200 ImmKindTyLiteral,
201 ImmKindTyMandatoryLiteral,
202 ImmKindTyConst,
203 };
204
205private:
206 struct TokOp {
207 const char *Data;
208 unsigned Length;
209 };
210
211 struct ImmOp {
212 int64_t Val;
213 ImmTy Type;
214 bool IsFPImm;
215 mutable ImmKindTy Kind;
216 Modifiers Mods;
217 };
218
219 struct RegOp {
220 MCRegister RegNo;
221 Modifiers Mods;
222 };
223
224 union {
225 TokOp Tok;
226 ImmOp Imm;
227 RegOp Reg;
228 const MCExpr *Expr;
229 };
230
231public:
232 bool isToken() const override { return Kind == Token; }
233
234 bool isSymbolRefExpr() const {
235 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
236 }
237
238 bool isImm() const override {
239 return Kind == Immediate;
240 }
241
242 void setImmKindNone() const {
243 assert(isImm());
244 Imm.Kind = ImmKindTyNone;
245 }
246
247 void setImmKindLiteral() const {
248 assert(isImm());
249 Imm.Kind = ImmKindTyLiteral;
250 }
251
252 void setImmKindMandatoryLiteral() const {
253 assert(isImm());
254 Imm.Kind = ImmKindTyMandatoryLiteral;
255 }
256
257 void setImmKindConst() const {
258 assert(isImm());
259 Imm.Kind = ImmKindTyConst;
260 }
261
262 bool IsImmKindLiteral() const {
263 return isImm() && Imm.Kind == ImmKindTyLiteral;
264 }
265
266 bool IsImmKindMandatoryLiteral() const {
267 return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
268 }
269
270 bool isImmKindConst() const {
271 return isImm() && Imm.Kind == ImmKindTyConst;
272 }
273
274 bool isInlinableImm(MVT type) const;
275 bool isLiteralImm(MVT type) const;
276
277 bool isRegKind() const {
278 return Kind == Register;
279 }
280
281 bool isReg() const override {
282 return isRegKind() && !hasModifiers();
283 }
284
285 bool isRegOrInline(unsigned RCID, MVT type) const {
286 return isRegClass(RCID) || isInlinableImm(type);
287 }
288
289 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
290 return isRegOrInline(RCID, type) || isLiteralImm(type);
291 }
292
293 bool isRegOrImmWithInt16InputMods() const {
294 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
295 }
296
297 template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {
299 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
300 }
301
302 bool isRegOrImmWithInt32InputMods() const {
303 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
304 }
305
306 bool isRegOrInlineImmWithInt16InputMods() const {
307 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
308 }
309
310 template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {
311 return isRegOrInline(
312 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
313 }
314
315 bool isRegOrInlineImmWithInt32InputMods() const {
316 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
317 }
318
319 bool isRegOrImmWithInt64InputMods() const {
320 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
321 }
322
323 bool isRegOrImmWithFP16InputMods() const {
324 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
325 }
326
327 template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {
329 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
330 }
331
332 bool isRegOrImmWithFP32InputMods() const {
333 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
334 }
335
336 bool isRegOrImmWithFP64InputMods() const {
337 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
338 }
339
340 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
341 return isRegOrInline(
342 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
343 }
344
345 bool isRegOrInlineImmWithFP32InputMods() const {
346 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
347 }
348
349 bool isRegOrInlineImmWithFP64InputMods() const {
350 return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
351 }
352
353 bool isVRegWithInputMods(unsigned RCID) const { return isRegClass(RCID); }
354
355 bool isVRegWithFP32InputMods() const {
356 return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
357 }
358
359 bool isVRegWithFP64InputMods() const {
360 return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
361 }
362
363 bool isPackedFP16InputMods() const {
364 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
365 }
366
367 bool isPackedVGPRFP32InputMods() const {
368 return isRegOrImmWithInputMods(AMDGPU::VReg_64RegClassID, MVT::v2f32);
369 }
370
371 bool isVReg() const {
372 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
373 isRegClass(AMDGPU::VReg_64RegClassID) ||
374 isRegClass(AMDGPU::VReg_96RegClassID) ||
375 isRegClass(AMDGPU::VReg_128RegClassID) ||
376 isRegClass(AMDGPU::VReg_160RegClassID) ||
377 isRegClass(AMDGPU::VReg_192RegClassID) ||
378 isRegClass(AMDGPU::VReg_256RegClassID) ||
379 isRegClass(AMDGPU::VReg_512RegClassID) ||
380 isRegClass(AMDGPU::VReg_1024RegClassID);
381 }
382
383 bool isVReg32() const {
384 return isRegClass(AMDGPU::VGPR_32RegClassID);
385 }
386
387 bool isVReg32OrOff() const {
388 return isOff() || isVReg32();
389 }
390
391 bool isNull() const {
392 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
393 }
394
395 bool isVRegWithInputMods() const;
396 template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
397 template <bool IsFake16> bool isT16VRegWithInputMods() const;
398
399 bool isSDWAOperand(MVT type) const;
400 bool isSDWAFP16Operand() const;
401 bool isSDWAFP32Operand() const;
402 bool isSDWAInt16Operand() const;
403 bool isSDWAInt32Operand() const;
404
405 bool isImmTy(ImmTy ImmT) const {
406 return isImm() && Imm.Type == ImmT;
407 }
408
409 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
410
411 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
412
413 bool isImmModifier() const {
414 return isImm() && Imm.Type != ImmTyNone;
415 }
416
417 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
418 bool isDim() const { return isImmTy(ImmTyDim); }
419 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
420 bool isOff() const { return isImmTy(ImmTyOff); }
421 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
422 bool isOffen() const { return isImmTy(ImmTyOffen); }
423 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
424 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
425 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
426 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
427 bool isGDS() const { return isImmTy(ImmTyGDS); }
428 bool isLDS() const { return isImmTy(ImmTyLDS); }
429 bool isCPol() const { return isImmTy(ImmTyCPol); }
430 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
431 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
432 bool isIndexKey32bit() const { return isImmTy(ImmTyIndexKey32bit); }
433 bool isMatrixAFMT() const { return isImmTy(ImmTyMatrixAFMT); }
434 bool isMatrixBFMT() const { return isImmTy(ImmTyMatrixBFMT); }
435 bool isMatrixAScale() const { return isImmTy(ImmTyMatrixAScale); }
436 bool isMatrixBScale() const { return isImmTy(ImmTyMatrixBScale); }
437 bool isMatrixAScaleFmt() const { return isImmTy(ImmTyMatrixAScaleFmt); }
438 bool isMatrixBScaleFmt() const { return isImmTy(ImmTyMatrixBScaleFmt); }
439 bool isMatrixAReuse() const { return isImmTy(ImmTyMatrixAReuse); }
440 bool isMatrixBReuse() const { return isImmTy(ImmTyMatrixBReuse); }
441 bool isTFE() const { return isImmTy(ImmTyTFE); }
442 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
443 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
444 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
445 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
446 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
447 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
448 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
449 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
450 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
451 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
452 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
453 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
454 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
455 bool isBitOp3() const { return isImmTy(ImmTyBitOp3) && isUInt<8>(getImm()); }
456
457 bool isRegOrImm() const {
458 return isReg() || isImm();
459 }
460
461 bool isRegClass(unsigned RCID) const;
462
463 bool isInlineValue() const;
464
465 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
466 return isRegOrInline(RCID, type) && !hasModifiers();
467 }
468
469 bool isSCSrcB16() const {
470 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
471 }
472
473 bool isSCSrcV2B16() const {
474 return isSCSrcB16();
475 }
476
477 bool isSCSrc_b32() const {
478 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
479 }
480
481 bool isSCSrc_b64() const {
482 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
483 }
484
485 bool isBoolReg() const;
486
487 bool isSCSrcF16() const {
488 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
489 }
490
491 bool isSCSrcV2F16() const {
492 return isSCSrcF16();
493 }
494
495 bool isSCSrcF32() const {
496 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
497 }
498
499 bool isSCSrcF64() const {
500 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
501 }
502
503 bool isSSrc_b32() const {
504 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
505 }
506
507 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
508
509 bool isSSrcV2B16() const {
510 llvm_unreachable("cannot happen");
511 return isSSrc_b16();
512 }
513
514 bool isSSrc_b64() const {
515 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
516 // See isVSrc64().
517 return isSCSrc_b64() || isLiteralImm(MVT::i64) ||
518 (((const MCTargetAsmParser *)AsmParser)
519 ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
520 isExpr());
521 }
522
523 bool isSSrc_f32() const {
524 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
525 }
526
527 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
528
529 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
530
531 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
532
533 bool isSSrcV2F16() const {
534 llvm_unreachable("cannot happen");
535 return isSSrc_f16();
536 }
537
538 bool isSSrcV2FP32() const {
539 llvm_unreachable("cannot happen");
540 return isSSrc_f32();
541 }
542
543 bool isSCSrcV2FP32() const {
544 llvm_unreachable("cannot happen");
545 return isSCSrcF32();
546 }
547
548 bool isSSrcV2INT32() const {
549 llvm_unreachable("cannot happen");
550 return isSSrc_b32();
551 }
552
553 bool isSCSrcV2INT32() const {
554 llvm_unreachable("cannot happen");
555 return isSCSrc_b32();
556 }
557
558 bool isSSrcOrLds_b32() const {
559 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
560 isLiteralImm(MVT::i32) || isExpr();
561 }
562
563 bool isVCSrc_b32() const {
564 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
565 }
566
567 bool isVCSrc_b32_Lo256() const {
568 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo256RegClassID, MVT::i32);
569 }
570
571 bool isVCSrc_b64_Lo256() const {
572 return isRegOrInlineNoMods(AMDGPU::VS_64_Lo256RegClassID, MVT::i64);
573 }
574
575 bool isVCSrc_b64() const {
576 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
577 }
578
579 bool isVCSrcT_b16() const {
580 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
581 }
582
583 bool isVCSrcTB16_Lo128() const {
584 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
585 }
586
587 bool isVCSrcFake16B16_Lo128() const {
588 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
589 }
590
591 bool isVCSrc_b16() const {
592 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
593 }
594
595 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
596
597 bool isVCSrc_f32() const {
598 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
599 }
600
601 bool isVCSrc_f64() const {
602 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
603 }
604
605 bool isVCSrcTBF16() const {
606 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
607 }
608
609 bool isVCSrcT_f16() const {
610 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
611 }
612
613 bool isVCSrcT_bf16() const {
614 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
615 }
616
617 bool isVCSrcTBF16_Lo128() const {
618 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
619 }
620
621 bool isVCSrcTF16_Lo128() const {
622 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
623 }
624
625 bool isVCSrcFake16BF16_Lo128() const {
626 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
627 }
628
629 bool isVCSrcFake16F16_Lo128() const {
630 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
631 }
632
633 bool isVCSrc_bf16() const {
634 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
635 }
636
637 bool isVCSrc_f16() const {
638 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
639 }
640
641 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
642
643 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
644
645 bool isVSrc_b32() const {
646 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
647 }
648
649 bool isVSrc_b64() const { return isVCSrc_f64() || isLiteralImm(MVT::i64); }
650
651 bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
652
653 bool isVSrcT_b16_Lo128() const {
654 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
655 }
656
657 bool isVSrcFake16_b16_Lo128() const {
658 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
659 }
660
661 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
662
663 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
664
665 bool isVCSrcV2FP32() const { return isVCSrc_f64(); }
666
667 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
668
669 bool isVCSrc_v2b32() const { return isVCSrc_b64(); }
670
671 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
672
673 bool isVSrc_f32() const {
674 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
675 }
676
677 bool isVSrc_f64() const { return isVCSrc_f64() || isLiteralImm(MVT::f64); }
678
679 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
680
681 bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
682
683 bool isVSrcT_bf16_Lo128() const {
684 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
685 }
686
687 bool isVSrcT_f16_Lo128() const {
688 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
689 }
690
691 bool isVSrcFake16_bf16_Lo128() const {
692 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
693 }
694
695 bool isVSrcFake16_f16_Lo128() const {
696 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
697 }
698
699 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
700
701 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
702
703 bool isVSrc_v2bf16() const {
704 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
705 }
706
707 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
708
709 bool isVSrc_NoInline_v2f16() const { return isVSrc_v2f16(); }
710
711 bool isVISrcB32() const {
712 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
713 }
714
715 bool isVISrcB16() const {
716 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
717 }
718
719 bool isVISrcV2B16() const {
720 return isVISrcB16();
721 }
722
723 bool isVISrcF32() const {
724 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
725 }
726
727 bool isVISrcF16() const {
728 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
729 }
730
731 bool isVISrcV2F16() const {
732 return isVISrcF16() || isVISrcB32();
733 }
734
735 bool isVISrc_64_bf16() const {
736 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
737 }
738
739 bool isVISrc_64_f16() const {
740 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
741 }
742
743 bool isVISrc_64_b32() const {
744 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
745 }
746
747 bool isVISrc_64B64() const {
748 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
749 }
750
751 bool isVISrc_64_f64() const {
752 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
753 }
754
755 bool isVISrc_64V2FP32() const {
756 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
757 }
758
759 bool isVISrc_64V2INT32() const {
760 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
761 }
762
763 bool isVISrc_256_b32() const {
764 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
765 }
766
767 bool isVISrc_256_f32() const {
768 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
769 }
770
771 bool isVISrc_256B64() const {
772 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
773 }
774
775 bool isVISrc_256_f64() const {
776 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
777 }
778
779 bool isVISrc_512_f64() const {
780 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64);
781 }
782
783 bool isVISrc_128B16() const {
784 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
785 }
786
787 bool isVISrc_128V2B16() const {
788 return isVISrc_128B16();
789 }
790
791 bool isVISrc_128_b32() const {
792 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
793 }
794
795 bool isVISrc_128_f32() const {
796 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
797 }
798
799 bool isVISrc_256V2FP32() const {
800 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
801 }
802
803 bool isVISrc_256V2INT32() const {
804 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
805 }
806
807 bool isVISrc_512_b32() const {
808 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
809 }
810
811 bool isVISrc_512B16() const {
812 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
813 }
814
815 bool isVISrc_512V2B16() const {
816 return isVISrc_512B16();
817 }
818
819 bool isVISrc_512_f32() const {
820 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
821 }
822
823 bool isVISrc_512F16() const {
824 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
825 }
826
827 bool isVISrc_512V2F16() const {
828 return isVISrc_512F16() || isVISrc_512_b32();
829 }
830
831 bool isVISrc_1024_b32() const {
832 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
833 }
834
835 bool isVISrc_1024B16() const {
836 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
837 }
838
839 bool isVISrc_1024V2B16() const {
840 return isVISrc_1024B16();
841 }
842
843 bool isVISrc_1024_f32() const {
844 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
845 }
846
847 bool isVISrc_1024F16() const {
848 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
849 }
850
851 bool isVISrc_1024V2F16() const {
852 return isVISrc_1024F16() || isVISrc_1024_b32();
853 }
854
855 bool isAISrcB32() const {
856 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
857 }
858
859 bool isAISrcB16() const {
860 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
861 }
862
863 bool isAISrcV2B16() const {
864 return isAISrcB16();
865 }
866
867 bool isAISrcF32() const {
868 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
869 }
870
871 bool isAISrcF16() const {
872 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
873 }
874
875 bool isAISrcV2F16() const {
876 return isAISrcF16() || isAISrcB32();
877 }
878
879 bool isAISrc_64B64() const {
880 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
881 }
882
883 bool isAISrc_64_f64() const {
884 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
885 }
886
887 bool isAISrc_128_b32() const {
888 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
889 }
890
891 bool isAISrc_128B16() const {
892 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
893 }
894
895 bool isAISrc_128V2B16() const {
896 return isAISrc_128B16();
897 }
898
899 bool isAISrc_128_f32() const {
900 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
901 }
902
903 bool isAISrc_128F16() const {
904 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
905 }
906
907 bool isAISrc_128V2F16() const {
908 return isAISrc_128F16() || isAISrc_128_b32();
909 }
910
911 bool isVISrc_128_bf16() const {
912 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
913 }
914
915 bool isVISrc_128_f16() const {
916 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
917 }
918
919 bool isVISrc_128V2F16() const {
920 return isVISrc_128_f16() || isVISrc_128_b32();
921 }
922
923 bool isAISrc_256B64() const {
924 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
925 }
926
927 bool isAISrc_256_f64() const {
928 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
929 }
930
931 bool isAISrc_512_b32() const {
932 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
933 }
934
935 bool isAISrc_512B16() const {
936 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
937 }
938
939 bool isAISrc_512V2B16() const {
940 return isAISrc_512B16();
941 }
942
943 bool isAISrc_512_f32() const {
944 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
945 }
946
947 bool isAISrc_512F16() const {
948 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
949 }
950
951 bool isAISrc_512V2F16() const {
952 return isAISrc_512F16() || isAISrc_512_b32();
953 }
954
955 bool isAISrc_1024_b32() const {
956 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
957 }
958
959 bool isAISrc_1024B16() const {
960 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
961 }
962
963 bool isAISrc_1024V2B16() const {
964 return isAISrc_1024B16();
965 }
966
967 bool isAISrc_1024_f32() const {
968 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
969 }
970
971 bool isAISrc_1024F16() const {
972 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
973 }
974
975 bool isAISrc_1024V2F16() const {
976 return isAISrc_1024F16() || isAISrc_1024_b32();
977 }
978
979 bool isKImmFP32() const {
980 return isLiteralImm(MVT::f32);
981 }
982
983 bool isKImmFP16() const {
984 return isLiteralImm(MVT::f16);
985 }
986
987 bool isKImmFP64() const { return isLiteralImm(MVT::f64); }
988
989 bool isMem() const override {
990 return false;
991 }
992
993 bool isExpr() const {
994 return Kind == Expression;
995 }
996
997 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
998
999 bool isSWaitCnt() const;
1000 bool isDepCtr() const;
1001 bool isSDelayALU() const;
1002 bool isHwreg() const;
1003 bool isSendMsg() const;
1004 bool isSplitBarrier() const;
1005 bool isSwizzle() const;
1006 bool isSMRDOffset8() const;
1007 bool isSMEMOffset() const;
1008 bool isSMRDLiteralOffset() const;
1009 bool isDPP8() const;
1010 bool isDPPCtrl() const;
1011 bool isBLGP() const;
1012 bool isGPRIdxMode() const;
1013 bool isS16Imm() const;
1014 bool isU16Imm() const;
1015 bool isEndpgm() const;
1016
1017 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
1018 return [this, P]() { return P(*this); };
1019 }
1020
1021 StringRef getToken() const {
1022 assert(isToken());
1023 return StringRef(Tok.Data, Tok.Length);
1024 }
1025
1026 int64_t getImm() const {
1027 assert(isImm());
1028 return Imm.Val;
1029 }
1030
1031 void setImm(int64_t Val) {
1032 assert(isImm());
1033 Imm.Val = Val;
1034 }
1035
1036 ImmTy getImmTy() const {
1037 assert(isImm());
1038 return Imm.Type;
1039 }
1040
1041 MCRegister getReg() const override {
1042 assert(isRegKind());
1043 return Reg.RegNo;
1044 }
1045
1046 SMLoc getStartLoc() const override {
1047 return StartLoc;
1048 }
1049
1050 SMLoc getEndLoc() const override {
1051 return EndLoc;
1052 }
1053
1054 SMRange getLocRange() const {
1055 return SMRange(StartLoc, EndLoc);
1056 }
1057
1058 Modifiers getModifiers() const {
1059 assert(isRegKind() || isImmTy(ImmTyNone));
1060 return isRegKind() ? Reg.Mods : Imm.Mods;
1061 }
1062
1063 void setModifiers(Modifiers Mods) {
1064 assert(isRegKind() || isImmTy(ImmTyNone));
1065 if (isRegKind())
1066 Reg.Mods = Mods;
1067 else
1068 Imm.Mods = Mods;
1069 }
1070
1071 bool hasModifiers() const {
1072 return getModifiers().hasModifiers();
1073 }
1074
1075 bool hasFPModifiers() const {
1076 return getModifiers().hasFPModifiers();
1077 }
1078
1079 bool hasIntModifiers() const {
1080 return getModifiers().hasIntModifiers();
1081 }
1082
1083 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1084
1085 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1086
1087 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1088
1089 void addRegOperands(MCInst &Inst, unsigned N) const;
1090
1091 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1092 if (isRegKind())
1093 addRegOperands(Inst, N);
1094 else
1095 addImmOperands(Inst, N);
1096 }
1097
1098 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1099 Modifiers Mods = getModifiers();
1100 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1101 if (isRegKind()) {
1102 addRegOperands(Inst, N);
1103 } else {
1104 addImmOperands(Inst, N, false);
1105 }
1106 }
1107
1108 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1109 assert(!hasIntModifiers());
1110 addRegOrImmWithInputModsOperands(Inst, N);
1111 }
1112
1113 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1114 assert(!hasFPModifiers());
1115 addRegOrImmWithInputModsOperands(Inst, N);
1116 }
1117
1118 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1119 Modifiers Mods = getModifiers();
1120 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1121 assert(isRegKind());
1122 addRegOperands(Inst, N);
1123 }
1124
1125 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1126 assert(!hasIntModifiers());
1127 addRegWithInputModsOperands(Inst, N);
1128 }
1129
1130 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1131 assert(!hasFPModifiers());
1132 addRegWithInputModsOperands(Inst, N);
1133 }
1134
1135 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1136 // clang-format off
1137 switch (Type) {
1138 case ImmTyNone: OS << "None"; break;
1139 case ImmTyGDS: OS << "GDS"; break;
1140 case ImmTyLDS: OS << "LDS"; break;
1141 case ImmTyOffen: OS << "Offen"; break;
1142 case ImmTyIdxen: OS << "Idxen"; break;
1143 case ImmTyAddr64: OS << "Addr64"; break;
1144 case ImmTyOffset: OS << "Offset"; break;
1145 case ImmTyInstOffset: OS << "InstOffset"; break;
1146 case ImmTyOffset0: OS << "Offset0"; break;
1147 case ImmTyOffset1: OS << "Offset1"; break;
1148 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1149 case ImmTyCPol: OS << "CPol"; break;
1150 case ImmTyIndexKey8bit: OS << "index_key"; break;
1151 case ImmTyIndexKey16bit: OS << "index_key"; break;
1152 case ImmTyIndexKey32bit: OS << "index_key"; break;
1153 case ImmTyTFE: OS << "TFE"; break;
1154 case ImmTyD16: OS << "D16"; break;
1155 case ImmTyFORMAT: OS << "FORMAT"; break;
1156 case ImmTyClamp: OS << "Clamp"; break;
1157 case ImmTyOModSI: OS << "OModSI"; break;
1158 case ImmTyDPP8: OS << "DPP8"; break;
1159 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1160 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1161 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1162 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1163 case ImmTyDppFI: OS << "DppFI"; break;
1164 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1165 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1166 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1167 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1168 case ImmTyDMask: OS << "DMask"; break;
1169 case ImmTyDim: OS << "Dim"; break;
1170 case ImmTyUNorm: OS << "UNorm"; break;
1171 case ImmTyDA: OS << "DA"; break;
1172 case ImmTyR128A16: OS << "R128A16"; break;
1173 case ImmTyA16: OS << "A16"; break;
1174 case ImmTyLWE: OS << "LWE"; break;
1175 case ImmTyOff: OS << "Off"; break;
1176 case ImmTyExpTgt: OS << "ExpTgt"; break;
1177 case ImmTyExpCompr: OS << "ExpCompr"; break;
1178 case ImmTyExpVM: OS << "ExpVM"; break;
1179 case ImmTyHwreg: OS << "Hwreg"; break;
1180 case ImmTySendMsg: OS << "SendMsg"; break;
1181 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1182 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1183 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1184 case ImmTyOpSel: OS << "OpSel"; break;
1185 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1186 case ImmTyNegLo: OS << "NegLo"; break;
1187 case ImmTyNegHi: OS << "NegHi"; break;
1188 case ImmTySwizzle: OS << "Swizzle"; break;
1189 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1190 case ImmTyHigh: OS << "High"; break;
1191 case ImmTyBLGP: OS << "BLGP"; break;
1192 case ImmTyCBSZ: OS << "CBSZ"; break;
1193 case ImmTyABID: OS << "ABID"; break;
1194 case ImmTyEndpgm: OS << "Endpgm"; break;
1195 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1196 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1197 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1198 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1199 case ImmTyBitOp3: OS << "BitOp3"; break;
1200 case ImmTyMatrixAFMT: OS << "ImmTyMatrixAFMT"; break;
1201 case ImmTyMatrixBFMT: OS << "ImmTyMatrixBFMT"; break;
1202 case ImmTyMatrixAScale: OS << "ImmTyMatrixAScale"; break;
1203 case ImmTyMatrixBScale: OS << "ImmTyMatrixBScale"; break;
1204 case ImmTyMatrixAScaleFmt: OS << "ImmTyMatrixAScaleFmt"; break;
1205 case ImmTyMatrixBScaleFmt: OS << "ImmTyMatrixBScaleFmt"; break;
1206 case ImmTyMatrixAReuse: OS << "ImmTyMatrixAReuse"; break;
1207 case ImmTyMatrixBReuse: OS << "ImmTyMatrixBReuse"; break;
1208 case ImmTyScaleSel: OS << "ScaleSel" ; break;
1209 case ImmTyByteSel: OS << "ByteSel" ; break;
1210 }
1211 // clang-format on
1212 }
1213
1214 void print(raw_ostream &OS, const MCAsmInfo &MAI) const override {
1215 switch (Kind) {
1216 case Register:
1217 OS << "<register " << AMDGPUInstPrinter::getRegisterName(getReg())
1218 << " mods: " << Reg.Mods << '>';
1219 break;
1220 case Immediate:
1221 OS << '<' << getImm();
1222 if (getImmTy() != ImmTyNone) {
1223 OS << " type: "; printImmTy(OS, getImmTy());
1224 }
1225 OS << " mods: " << Imm.Mods << '>';
1226 break;
1227 case Token:
1228 OS << '\'' << getToken() << '\'';
1229 break;
1230 case Expression:
1231 OS << "<expr ";
1232 MAI.printExpr(OS, *Expr);
1233 OS << '>';
1234 break;
1235 }
1236 }
1237
1238 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1239 int64_t Val, SMLoc Loc,
1240 ImmTy Type = ImmTyNone,
1241 bool IsFPImm = false) {
1242 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1243 Op->Imm.Val = Val;
1244 Op->Imm.IsFPImm = IsFPImm;
1245 Op->Imm.Kind = ImmKindTyNone;
1246 Op->Imm.Type = Type;
1247 Op->Imm.Mods = Modifiers();
1248 Op->StartLoc = Loc;
1249 Op->EndLoc = Loc;
1250 return Op;
1251 }
1252
1253 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1254 StringRef Str, SMLoc Loc,
1255 bool HasExplicitEncodingSize = true) {
1256 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1257 Res->Tok.Data = Str.data();
1258 Res->Tok.Length = Str.size();
1259 Res->StartLoc = Loc;
1260 Res->EndLoc = Loc;
1261 return Res;
1262 }
1263
1264 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1265 MCRegister Reg, SMLoc S, SMLoc E) {
1266 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1267 Op->Reg.RegNo = Reg;
1268 Op->Reg.Mods = Modifiers();
1269 Op->StartLoc = S;
1270 Op->EndLoc = E;
1271 return Op;
1272 }
1273
1274 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1275 const class MCExpr *Expr, SMLoc S) {
1276 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1277 Op->Expr = Expr;
1278 Op->StartLoc = S;
1279 Op->EndLoc = S;
1280 return Op;
1281 }
1282};
1283
1284raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1285 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1286 return OS;
1287}
1288
1289//===----------------------------------------------------------------------===//
1290// AsmParser
1291//===----------------------------------------------------------------------===//
1292
1293// Holds info related to the current kernel, e.g. count of SGPRs used.
1294// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1295// .amdgpu_hsa_kernel or at EOF.
1296class KernelScopeInfo {
1297 int SgprIndexUnusedMin = -1;
1298 int VgprIndexUnusedMin = -1;
1299 int AgprIndexUnusedMin = -1;
1300 MCContext *Ctx = nullptr;
1301 MCSubtargetInfo const *MSTI = nullptr;
1302
1303 void usesSgprAt(int i) {
1304 if (i >= SgprIndexUnusedMin) {
1305 SgprIndexUnusedMin = ++i;
1306 if (Ctx) {
1307 MCSymbol* const Sym =
1308 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1309 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1310 }
1311 }
1312 }
1313
1314 void usesVgprAt(int i) {
1315 if (i >= VgprIndexUnusedMin) {
1316 VgprIndexUnusedMin = ++i;
1317 if (Ctx) {
1318 MCSymbol* const Sym =
1319 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1320 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1321 VgprIndexUnusedMin);
1322 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1323 }
1324 }
1325 }
1326
1327 void usesAgprAt(int i) {
1328 // Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction
1329 if (!hasMAIInsts(*MSTI))
1330 return;
1331
1332 if (i >= AgprIndexUnusedMin) {
1333 AgprIndexUnusedMin = ++i;
1334 if (Ctx) {
1335 MCSymbol* const Sym =
1336 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1337 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1338
1339 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1340 MCSymbol* const vSym =
1341 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1342 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1343 VgprIndexUnusedMin);
1344 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1345 }
1346 }
1347 }
1348
1349public:
1350 KernelScopeInfo() = default;
1351
1352 void initialize(MCContext &Context) {
1353 Ctx = &Context;
1354 MSTI = Ctx->getSubtargetInfo();
1355
1356 usesSgprAt(SgprIndexUnusedMin = -1);
1357 usesVgprAt(VgprIndexUnusedMin = -1);
1358 if (hasMAIInsts(*MSTI)) {
1359 usesAgprAt(AgprIndexUnusedMin = -1);
1360 }
1361 }
1362
1363 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1364 unsigned RegWidth) {
1365 switch (RegKind) {
1366 case IS_SGPR:
1367 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1368 break;
1369 case IS_AGPR:
1370 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1371 break;
1372 case IS_VGPR:
1373 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1374 break;
1375 default:
1376 break;
1377 }
1378 }
1379};
1380
1381class AMDGPUAsmParser : public MCTargetAsmParser {
1382 MCAsmParser &Parser;
1383
1384 unsigned ForcedEncodingSize = 0;
1385 bool ForcedDPP = false;
1386 bool ForcedSDWA = false;
1387 KernelScopeInfo KernelScope;
1388
1389 /// @name Auto-generated Match Functions
1390 /// {
1391
1392#define GET_ASSEMBLER_HEADER
1393#include "AMDGPUGenAsmMatcher.inc"
1394
1395 /// }
1396
1397private:
1398 void createConstantSymbol(StringRef Id, int64_t Val);
1399
1400 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1401 bool OutOfRangeError(SMRange Range);
1402 /// Calculate VGPR/SGPR blocks required for given target, reserved
1403 /// registers, and user-specified NextFreeXGPR values.
1404 ///
1405 /// \param Features [in] Target features, used for bug corrections.
1406 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1407 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1408 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1409 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1410 /// descriptor field, if valid.
1411 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1412 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1413 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1414 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1415 /// \param VGPRBlocks [out] Result VGPR block count.
1416 /// \param SGPRBlocks [out] Result SGPR block count.
1417 bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1418 const MCExpr *FlatScrUsed, bool XNACKUsed,
1419 std::optional<bool> EnableWavefrontSize32,
1420 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1421 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1422 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
1423 bool ParseDirectiveAMDGCNTarget();
1424 bool ParseDirectiveAMDHSACodeObjectVersion();
1425 bool ParseDirectiveAMDHSAKernel();
1426 bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1427 bool ParseDirectiveAMDKernelCodeT();
1428 // TODO: Possibly make subtargetHasRegister const.
1429 bool subtargetHasRegister(const MCRegisterInfo &MRI, MCRegister Reg);
1430 bool ParseDirectiveAMDGPUHsaKernel();
1431
1432 bool ParseDirectiveISAVersion();
1433 bool ParseDirectiveHSAMetadata();
1434 bool ParseDirectivePALMetadataBegin();
1435 bool ParseDirectivePALMetadata();
1436 bool ParseDirectiveAMDGPULDS();
1437
1438 /// Common code to parse out a block of text (typically YAML) between start and
1439 /// end directives.
1440 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1441 const char *AssemblerDirectiveEnd,
1442 std::string &CollectString);
1443
1444 bool AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
1445 RegisterKind RegKind, MCRegister Reg1, SMLoc Loc);
1446 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1447 unsigned &RegNum, unsigned &RegWidth,
1448 bool RestoreOnFailure = false);
1449 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1450 unsigned &RegNum, unsigned &RegWidth,
1452 MCRegister ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1453 unsigned &RegWidth,
1455 MCRegister ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1456 unsigned &RegWidth,
1458 MCRegister ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1459 unsigned &RegWidth,
1461 bool ParseRegRange(unsigned &Num, unsigned &Width, unsigned &SubReg);
1462 MCRegister getRegularReg(RegisterKind RegKind, unsigned RegNum,
1463 unsigned SubReg, unsigned RegWidth, SMLoc Loc);
1464
1465 bool isRegister();
1466 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1467 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1468 void initializeGprCountSymbol(RegisterKind RegKind);
1469 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1470 unsigned RegWidth);
1471 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1472 bool IsAtomic);
1473
1474public:
1475 enum OperandMode {
1476 OperandMode_Default,
1477 OperandMode_NSA,
1478 };
1479
1480 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1481
1482 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1483 const MCInstrInfo &MII,
1484 const MCTargetOptions &Options)
1485 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1487
1488 if (getFeatureBits().none()) {
1489 // Set default features.
1490 copySTI().ToggleFeature("southern-islands");
1491 }
1492
1493 FeatureBitset FB = getFeatureBits();
1494 if (!FB[AMDGPU::FeatureWavefrontSize64] &&
1495 !FB[AMDGPU::FeatureWavefrontSize32]) {
1496 // If there is no default wave size it must be a generation before gfx10,
1497 // these have FeatureWavefrontSize64 in their definition already. For
1498 // gfx10+ set wave32 as a default.
1499 copySTI().ToggleFeature(AMDGPU::FeatureWavefrontSize32);
1500 }
1501
1502 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1503
1504 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1505 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1506 createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
1507 createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
1508 createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping);
1509 } else {
1510 createConstantSymbol(".option.machine_version_major", ISA.Major);
1511 createConstantSymbol(".option.machine_version_minor", ISA.Minor);
1512 createConstantSymbol(".option.machine_version_stepping", ISA.Stepping);
1513 }
1514 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1515 initializeGprCountSymbol(IS_VGPR);
1516 initializeGprCountSymbol(IS_SGPR);
1517 } else
1518 KernelScope.initialize(getContext());
1519
1520 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1521 createConstantSymbol(Symbol, Code);
1522
1523 createConstantSymbol("UC_VERSION_W64_BIT", 0x2000);
1524 createConstantSymbol("UC_VERSION_W32_BIT", 0x4000);
1525 createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000);
1526 }
1527
1528 bool hasMIMG_R128() const {
1529 return AMDGPU::hasMIMG_R128(getSTI());
1530 }
1531
1532 bool hasPackedD16() const {
1533 return AMDGPU::hasPackedD16(getSTI());
1534 }
1535
1536 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1537
1538 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1539
1540 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1541
1542 bool isSI() const {
1543 return AMDGPU::isSI(getSTI());
1544 }
1545
1546 bool isCI() const {
1547 return AMDGPU::isCI(getSTI());
1548 }
1549
1550 bool isVI() const {
1551 return AMDGPU::isVI(getSTI());
1552 }
1553
1554 bool isGFX9() const {
1555 return AMDGPU::isGFX9(getSTI());
1556 }
1557
1558 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1559 bool isGFX90A() const {
1560 return AMDGPU::isGFX90A(getSTI());
1561 }
1562
1563 bool isGFX940() const {
1564 return AMDGPU::isGFX940(getSTI());
1565 }
1566
1567 bool isGFX9Plus() const {
1568 return AMDGPU::isGFX9Plus(getSTI());
1569 }
1570
1571 bool isGFX10() const {
1572 return AMDGPU::isGFX10(getSTI());
1573 }
1574
1575 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1576
1577 bool isGFX11() const {
1578 return AMDGPU::isGFX11(getSTI());
1579 }
1580
1581 bool isGFX11Plus() const {
1582 return AMDGPU::isGFX11Plus(getSTI());
1583 }
1584
1585 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1586
1587 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1588
1589 bool isGFX1250() const { return AMDGPU::isGFX1250(getSTI()); }
1590
1591 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1592
1593 bool isGFX10_BEncoding() const {
1594 return AMDGPU::isGFX10_BEncoding(getSTI());
1595 }
1596
1597 bool hasInv2PiInlineImm() const {
1598 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1599 }
1600
1601 bool has64BitLiterals() const {
1602 return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1603 }
1604
1605 bool hasFlatOffsets() const {
1606 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1607 }
1608
1609 bool hasTrue16Insts() const {
1610 return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1611 }
1612
1613 bool hasArchitectedFlatScratch() const {
1614 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1615 }
1616
1617 bool hasSGPR102_SGPR103() const {
1618 return !isVI() && !isGFX9();
1619 }
1620
1621 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1622
1623 bool hasIntClamp() const {
1624 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1625 }
1626
1627 bool hasPartialNSAEncoding() const {
1628 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1629 }
1630
1631 bool hasGloballyAddressableScratch() const {
1632 return getFeatureBits()[AMDGPU::FeatureGloballyAddressableScratch];
1633 }
1634
1635 unsigned getNSAMaxSize(bool HasSampler = false) const {
1636 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1637 }
1638
1639 unsigned getMaxNumUserSGPRs() const {
1640 return AMDGPU::getMaxNumUserSGPRs(getSTI());
1641 }
1642
1643 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1644
1645 AMDGPUTargetStreamer &getTargetStreamer() {
1646 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1647 return static_cast<AMDGPUTargetStreamer &>(TS);
1648 }
1649
1650 const MCRegisterInfo *getMRI() const {
1651 // We need this const_cast because for some reason getContext() is not const
1652 // in MCAsmParser.
1653 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1654 }
1655
1656 const MCInstrInfo *getMII() const {
1657 return &MII;
1658 }
1659
1660 const FeatureBitset &getFeatureBits() const {
1661 return getSTI().getFeatureBits();
1662 }
1663
1664 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1665 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1666 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1667
1668 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1669 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1670 bool isForcedDPP() const { return ForcedDPP; }
1671 bool isForcedSDWA() const { return ForcedSDWA; }
1672 ArrayRef<unsigned> getMatchedVariants() const;
1673 StringRef getMatchedVariantName() const;
1674
1675 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1676 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1677 bool RestoreOnFailure);
1678 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1679 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1680 SMLoc &EndLoc) override;
1681 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1682 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1683 unsigned Kind) override;
1684 bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1685 OperandVector &Operands, MCStreamer &Out,
1686 uint64_t &ErrorInfo,
1687 bool MatchingInlineAsm) override;
1688 bool ParseDirective(AsmToken DirectiveID) override;
1689 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1690 OperandMode Mode = OperandMode_Default);
1691 StringRef parseMnemonicSuffix(StringRef Name);
1692 bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1693 SMLoc NameLoc, OperandVector &Operands) override;
1694 //bool ProcessInstruction(MCInst &Inst);
1695
1696 ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1697
1698 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1699
1700 ParseStatus
1701 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1702 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1703 std::function<bool(int64_t &)> ConvertResult = nullptr);
1704
1705 ParseStatus parseOperandArrayWithPrefix(
1706 const char *Prefix, OperandVector &Operands,
1707 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1708 bool (*ConvertResult)(int64_t &) = nullptr);
1709
1710 ParseStatus
1711 parseNamedBit(StringRef Name, OperandVector &Operands,
1712 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1713 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1714 ParseStatus parseCPol(OperandVector &Operands);
1715 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1716 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1717 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1718 SMLoc &StringLoc);
1719 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1720 StringRef Name,
1721 ArrayRef<const char *> Ids,
1722 int64_t &IntVal);
1723 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1724 StringRef Name,
1725 ArrayRef<const char *> Ids,
1726 AMDGPUOperand::ImmTy Type);
1727
1728 bool isModifier();
1729 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1730 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1731 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1732 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1733 bool parseSP3NegModifier();
1734 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1735 bool HasLit = false, bool HasLit64 = false);
1736 ParseStatus parseReg(OperandVector &Operands);
1737 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1738 bool HasLit = false, bool HasLit64 = false);
1739 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1740 bool AllowImm = true);
1741 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1742 bool AllowImm = true);
1743 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1744 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1745 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1746 ParseStatus tryParseIndexKey(OperandVector &Operands,
1747 AMDGPUOperand::ImmTy ImmTy);
1748 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1749 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1750 ParseStatus parseIndexKey32bit(OperandVector &Operands);
1751 ParseStatus tryParseMatrixFMT(OperandVector &Operands, StringRef Name,
1752 AMDGPUOperand::ImmTy Type);
1753 ParseStatus parseMatrixAFMT(OperandVector &Operands);
1754 ParseStatus parseMatrixBFMT(OperandVector &Operands);
1755 ParseStatus tryParseMatrixScale(OperandVector &Operands, StringRef Name,
1756 AMDGPUOperand::ImmTy Type);
1757 ParseStatus parseMatrixAScale(OperandVector &Operands);
1758 ParseStatus parseMatrixBScale(OperandVector &Operands);
1759 ParseStatus tryParseMatrixScaleFmt(OperandVector &Operands, StringRef Name,
1760 AMDGPUOperand::ImmTy Type);
1761 ParseStatus parseMatrixAScaleFmt(OperandVector &Operands);
1762 ParseStatus parseMatrixBScaleFmt(OperandVector &Operands);
1763
1764 ParseStatus parseDfmtNfmt(int64_t &Format);
1765 ParseStatus parseUfmt(int64_t &Format);
1766 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1767 int64_t &Format);
1768 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1769 int64_t &Format);
1770 ParseStatus parseFORMAT(OperandVector &Operands);
1771 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1772 ParseStatus parseNumericFormat(int64_t &Format);
1773 ParseStatus parseFlatOffset(OperandVector &Operands);
1774 ParseStatus parseR128A16(OperandVector &Operands);
1775 ParseStatus parseBLGP(OperandVector &Operands);
1776 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1777 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1778
1779 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1780
1781 bool parseCnt(int64_t &IntVal);
1782 ParseStatus parseSWaitCnt(OperandVector &Operands);
1783
1784 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1785 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1786 ParseStatus parseDepCtr(OperandVector &Operands);
1787
1788 bool parseDelay(int64_t &Delay);
1789 ParseStatus parseSDelayALU(OperandVector &Operands);
1790
1791 ParseStatus parseHwreg(OperandVector &Operands);
1792
1793private:
1794 struct OperandInfoTy {
1795 SMLoc Loc;
1796 int64_t Val;
1797 bool IsSymbolic = false;
1798 bool IsDefined = false;
1799
1800 OperandInfoTy(int64_t Val) : Val(Val) {}
1801 };
1802
1803 struct StructuredOpField : OperandInfoTy {
1804 StringLiteral Id;
1805 StringLiteral Desc;
1806 unsigned Width;
1807 bool IsDefined = false;
1808
1809 StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width,
1810 int64_t Default)
1811 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1812 virtual ~StructuredOpField() = default;
1813
1814 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1815 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1816 return false;
1817 }
1818
1819 virtual bool validate(AMDGPUAsmParser &Parser) const {
1820 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1821 return Error(Parser, "not supported on this GPU");
1822 if (!isUIntN(Width, Val))
1823 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1824 return true;
1825 }
1826 };
1827
1828 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1829 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1830
1831 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1832 bool validateSendMsg(const OperandInfoTy &Msg,
1833 const OperandInfoTy &Op,
1834 const OperandInfoTy &Stream);
1835
1836 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1837 OperandInfoTy &Width);
1838
1839 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1840 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1841 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1842
1843 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1844 const OperandVector &Operands) const;
1845 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1846 SMLoc getRegLoc(MCRegister Reg, const OperandVector &Operands) const;
1847 SMLoc getLitLoc(const OperandVector &Operands,
1848 bool SearchMandatoryLiterals = false) const;
1849 SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1850 SMLoc getConstLoc(const OperandVector &Operands) const;
1851 SMLoc getInstLoc(const OperandVector &Operands) const;
1852
1853 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1854 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1855 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1856 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1857 bool validateSOPLiteral(const MCInst &Inst) const;
1858 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1859 std::optional<unsigned> checkVOPDRegBankConstraints(const MCInst &Inst,
1860 bool AsVOPD3);
1861 bool validateVOPD(const MCInst &Inst, const OperandVector &Operands);
1862 bool tryVOPD(const MCInst &Inst);
1863 bool tryVOPD3(const MCInst &Inst);
1864 bool tryAnotherVOPDEncoding(const MCInst &Inst);
1865
1866 bool validateIntClampSupported(const MCInst &Inst);
1867 bool validateMIMGAtomicDMask(const MCInst &Inst);
1868 bool validateMIMGGatherDMask(const MCInst &Inst);
1869 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1870 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1871 bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1872 bool validateMIMGD16(const MCInst &Inst);
1873 bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);
1874 bool validateTensorR128(const MCInst &Inst);
1875 bool validateMIMGMSAA(const MCInst &Inst);
1876 bool validateOpSel(const MCInst &Inst);
1877 bool validateTrue16OpSel(const MCInst &Inst);
1878 bool validateNeg(const MCInst &Inst, AMDGPU::OpName OpName);
1879 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1880 bool validateVccOperand(MCRegister Reg) const;
1881 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1882 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1883 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1884 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1885 bool validateAGPRLdSt(const MCInst &Inst) const;
1886 bool validateVGPRAlign(const MCInst &Inst) const;
1887 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1888 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1889 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1890 bool validateDivScale(const MCInst &Inst);
1891 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1892 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1893 const SMLoc &IDLoc);
1894 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1895 const unsigned CPol);
1896 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1897 bool validateSetVgprMSB(const MCInst &Inst, const OperandVector &Operands);
1898 std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1899 bool validateWMMA(const MCInst &Inst, const OperandVector &Operands);
1900 unsigned getConstantBusLimit(unsigned Opcode) const;
1901 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1902 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1903 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1904
1905 bool isSupportedMnemo(StringRef Mnemo,
1906 const FeatureBitset &FBS);
1907 bool isSupportedMnemo(StringRef Mnemo,
1908 const FeatureBitset &FBS,
1909 ArrayRef<unsigned> Variants);
1910 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1911
1912 bool isId(const StringRef Id) const;
1913 bool isId(const AsmToken &Token, const StringRef Id) const;
1914 bool isToken(const AsmToken::TokenKind Kind) const;
1915 StringRef getId() const;
1916 bool trySkipId(const StringRef Id);
1917 bool trySkipId(const StringRef Pref, const StringRef Id);
1918 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1919 bool trySkipToken(const AsmToken::TokenKind Kind);
1920 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1921 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1922 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1923
1924 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1925 AsmToken::TokenKind getTokenKind() const;
1926 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1928 StringRef getTokenStr() const;
1929 AsmToken peekToken(bool ShouldSkipSpace = true);
1930 AsmToken getToken() const;
1931 SMLoc getLoc() const;
1932 void lex();
1933
1934public:
1935 void onBeginOfFile() override;
1936 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1937
1938 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1939
1940 ParseStatus parseExpTgt(OperandVector &Operands);
1941 ParseStatus parseSendMsg(OperandVector &Operands);
1942 ParseStatus parseInterpSlot(OperandVector &Operands);
1943 ParseStatus parseInterpAttr(OperandVector &Operands);
1944 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1945 ParseStatus parseBoolReg(OperandVector &Operands);
1946
1947 bool parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
1948 const unsigned MaxVal, const Twine &ErrMsg,
1949 SMLoc &Loc);
1950 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1951 const unsigned MinVal,
1952 const unsigned MaxVal,
1953 const StringRef ErrMsg);
1954 ParseStatus parseSwizzle(OperandVector &Operands);
1955 bool parseSwizzleOffset(int64_t &Imm);
1956 bool parseSwizzleMacro(int64_t &Imm);
1957 bool parseSwizzleQuadPerm(int64_t &Imm);
1958 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1959 bool parseSwizzleBroadcast(int64_t &Imm);
1960 bool parseSwizzleSwap(int64_t &Imm);
1961 bool parseSwizzleReverse(int64_t &Imm);
1962 bool parseSwizzleFFT(int64_t &Imm);
1963 bool parseSwizzleRotate(int64_t &Imm);
1964
1965 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1966 int64_t parseGPRIdxMacro();
1967
1968 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1969 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1970
1971 ParseStatus parseOModSI(OperandVector &Operands);
1972
1973 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1974 OptionalImmIndexMap &OptionalIdx);
1975 void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands);
1976 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1977 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1978 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1979 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1980
1981 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1982 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1983 OptionalImmIndexMap &OptionalIdx);
1984 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1985 OptionalImmIndexMap &OptionalIdx);
1986
1987 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1988 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1989
1990 bool parseDimId(unsigned &Encoding);
1991 ParseStatus parseDim(OperandVector &Operands);
1992 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1993 ParseStatus parseDPP8(OperandVector &Operands);
1994 ParseStatus parseDPPCtrl(OperandVector &Operands);
1995 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1996 int64_t parseDPPCtrlSel(StringRef Ctrl);
1997 int64_t parseDPPCtrlPerm();
1998 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1999 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
2000 cvtDPP(Inst, Operands, true);
2001 }
2002 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
2003 bool IsDPP8 = false);
2004 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
2005 cvtVOP3DPP(Inst, Operands, true);
2006 }
2007
2008 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
2009 AMDGPUOperand::ImmTy Type);
2010 ParseStatus parseSDWADstUnused(OperandVector &Operands);
2011 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
2012 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
2013 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
2014 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
2015 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
2016 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
2017 uint64_t BasicInstType,
2018 bool SkipDstVcc = false,
2019 bool SkipSrcVcc = false);
2020
2021 ParseStatus parseEndpgm(OperandVector &Operands);
2022
2023 ParseStatus parseVOPD(OperandVector &Operands);
2024};
2025
2026} // end anonymous namespace
2027
2028// May be called with integer type with equivalent bitwidth.
2029static const fltSemantics *getFltSemantics(unsigned Size) {
2030 switch (Size) {
2031 case 4:
2032 return &APFloat::IEEEsingle();
2033 case 8:
2034 return &APFloat::IEEEdouble();
2035 case 2:
2036 return &APFloat::IEEEhalf();
2037 default:
2038 llvm_unreachable("unsupported fp type");
2039 }
2040}
2041
2043 return getFltSemantics(VT.getSizeInBits() / 8);
2044}
2045
2047 switch (OperandType) {
2048 // When floating-point immediate is used as operand of type i16, the 32-bit
2049 // representation of the constant truncated to the 16 LSBs should be used.
2064 return &APFloat::IEEEsingle();
2071 return &APFloat::IEEEdouble();
2078 return &APFloat::IEEEhalf();
2083 return &APFloat::BFloat();
2084 default:
2085 llvm_unreachable("unsupported fp type");
2086 }
2087}
2088
2089//===----------------------------------------------------------------------===//
2090// Operand
2091//===----------------------------------------------------------------------===//
2092
2093static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
2094 bool Lost;
2095
2096 // Convert literal to single precision
2099 &Lost);
2100 // We allow precision lost but not overflow or underflow
2101 if (Status != APFloat::opOK &&
2102 Lost &&
2103 ((Status & APFloat::opOverflow) != 0 ||
2104 (Status & APFloat::opUnderflow) != 0)) {
2105 return false;
2106 }
2107
2108 return true;
2109}
2110
2111static bool isSafeTruncation(int64_t Val, unsigned Size) {
2112 return isUIntN(Size, Val) || isIntN(Size, Val);
2113}
2114
2115static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2116 if (VT.getScalarType() == MVT::i16)
2117 return isInlinableLiteral32(Val, HasInv2Pi);
2118
2119 if (VT.getScalarType() == MVT::f16)
2120 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2121
2122 assert(VT.getScalarType() == MVT::bf16);
2123
2124 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2125}
2126
2127bool AMDGPUOperand::isInlinableImm(MVT type) const {
2128
2129 // This is a hack to enable named inline values like
2130 // shared_base with both 32-bit and 64-bit operands.
2131 // Note that these values are defined as
2132 // 32-bit operands only.
2133 if (isInlineValue()) {
2134 return true;
2135 }
2136
2137 if (!isImmTy(ImmTyNone)) {
2138 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2139 return false;
2140 }
2141 // TODO: We should avoid using host float here. It would be better to
2142 // check the float bit values which is what a few other places do.
2143 // We've had bot failures before due to weird NaN support on mips hosts.
2144
2145 APInt Literal(64, Imm.Val);
2146
2147 if (Imm.IsFPImm) { // We got fp literal token
2148 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2150 AsmParser->hasInv2PiInlineImm());
2151 }
2152
2153 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2154 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2155 return false;
2156
2157 if (type.getScalarSizeInBits() == 16) {
2158 bool Lost = false;
2159 switch (type.getScalarType().SimpleTy) {
2160 default:
2161 llvm_unreachable("unknown 16-bit type");
2162 case MVT::bf16:
2163 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2164 &Lost);
2165 break;
2166 case MVT::f16:
2167 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2168 &Lost);
2169 break;
2170 case MVT::i16:
2171 FPLiteral.convert(APFloatBase::IEEEsingle(),
2172 APFloat::rmNearestTiesToEven, &Lost);
2173 break;
2174 }
2175 // We need to use 32-bit representation here because when a floating-point
2176 // inline constant is used as an i16 operand, its 32-bit representation
2177 // representation will be used. We will need the 32-bit value to check if
2178 // it is FP inline constant.
2179 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2180 return isInlineableLiteralOp16(ImmVal, type,
2181 AsmParser->hasInv2PiInlineImm());
2182 }
2183
2184 // Check if single precision literal is inlinable
2186 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2187 AsmParser->hasInv2PiInlineImm());
2188 }
2189
2190 // We got int literal token.
2191 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2193 AsmParser->hasInv2PiInlineImm());
2194 }
2195
2196 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2197 return false;
2198 }
2199
2200 if (type.getScalarSizeInBits() == 16) {
2202 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2203 type, AsmParser->hasInv2PiInlineImm());
2204 }
2205
2207 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2208 AsmParser->hasInv2PiInlineImm());
2209}
2210
2211bool AMDGPUOperand::isLiteralImm(MVT type) const {
2212 // Check that this immediate can be added as literal
2213 if (!isImmTy(ImmTyNone)) {
2214 return false;
2215 }
2216
2217 bool Allow64Bit =
2218 (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();
2219
2220 if (!Imm.IsFPImm) {
2221 // We got int literal token.
2222
2223 if (type == MVT::f64 && hasFPModifiers()) {
2224 // Cannot apply fp modifiers to int literals preserving the same semantics
2225 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2226 // disable these cases.
2227 return false;
2228 }
2229
2230 unsigned Size = type.getSizeInBits();
2231 if (Size == 64) {
2232 if (Allow64Bit && !AMDGPU::isValid32BitLiteral(Imm.Val, false))
2233 return true;
2234 Size = 32;
2235 }
2236
2237 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2238 // types.
2239 return isSafeTruncation(Imm.Val, Size);
2240 }
2241
2242 // We got fp literal token
2243 if (type == MVT::f64) { // Expected 64-bit fp operand
2244 // We would set low 64-bits of literal to zeroes but we accept this literals
2245 return true;
2246 }
2247
2248 if (type == MVT::i64) { // Expected 64-bit int operand
2249 // We don't allow fp literals in 64-bit integer instructions. It is
2250 // unclear how we should encode them.
2251 return false;
2252 }
2253
2254 // We allow fp literals with f16x2 operands assuming that the specified
2255 // literal goes into the lower half and the upper half is zero. We also
2256 // require that the literal may be losslessly converted to f16.
2257 //
2258 // For i16x2 operands, we assume that the specified literal is encoded as a
2259 // single-precision float. This is pretty odd, but it matches SP3 and what
2260 // happens in hardware.
2261 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2262 : (type == MVT::v2i16) ? MVT::f32
2263 : (type == MVT::v2f32) ? MVT::f32
2264 : type;
2265
2266 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2267 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2268}
2269
2270bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2271 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2272}
2273
2274bool AMDGPUOperand::isVRegWithInputMods() const {
2275 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2276 // GFX90A allows DPP on 64-bit operands.
2277 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2278 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2279}
2280
2281template <bool IsFake16>
2282bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {
2283 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2284 : AMDGPU::VGPR_16_Lo128RegClassID);
2285}
2286
2287template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2288 return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2289 : AMDGPU::VGPR_16RegClassID);
2290}
2291
2292bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2293 if (AsmParser->isVI())
2294 return isVReg32();
2295 if (AsmParser->isGFX9Plus())
2296 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2297 return false;
2298}
2299
2300bool AMDGPUOperand::isSDWAFP16Operand() const {
2301 return isSDWAOperand(MVT::f16);
2302}
2303
2304bool AMDGPUOperand::isSDWAFP32Operand() const {
2305 return isSDWAOperand(MVT::f32);
2306}
2307
2308bool AMDGPUOperand::isSDWAInt16Operand() const {
2309 return isSDWAOperand(MVT::i16);
2310}
2311
2312bool AMDGPUOperand::isSDWAInt32Operand() const {
2313 return isSDWAOperand(MVT::i32);
2314}
2315
2316bool AMDGPUOperand::isBoolReg() const {
2317 auto FB = AsmParser->getFeatureBits();
2318 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
2319 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2320}
2321
2322uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2323{
2324 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2325 assert(Size == 2 || Size == 4 || Size == 8);
2326
2327 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2328
2329 if (Imm.Mods.Abs) {
2330 Val &= ~FpSignMask;
2331 }
2332 if (Imm.Mods.Neg) {
2333 Val ^= FpSignMask;
2334 }
2335
2336 return Val;
2337}
2338
2339void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2340 if (isExpr()) {
2342 return;
2343 }
2344
2345 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2346 Inst.getNumOperands())) {
2347 addLiteralImmOperand(Inst, Imm.Val,
2348 ApplyModifiers &
2349 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2350 } else {
2351 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2353 setImmKindNone();
2354 }
2355}
2356
2357void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2358 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2359 auto OpNum = Inst.getNumOperands();
2360 // Check that this operand accepts literals
2361 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2362
2363 if (ApplyModifiers) {
2364 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2365 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2366 Val = applyInputFPModifiers(Val, Size);
2367 }
2368
2369 APInt Literal(64, Val);
2370 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2371
2372 if (Imm.IsFPImm) { // We got fp literal token
2373 switch (OpTy) {
2379 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2380 AsmParser->hasInv2PiInlineImm())) {
2381 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2382 setImmKindConst();
2383 return;
2384 }
2385
2386 // Non-inlineable
2387 if (AMDGPU::isSISrcFPOperand(InstDesc,
2388 OpNum)) { // Expected 64-bit fp operand
2389 bool HasMandatoryLiteral =
2390 AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::imm);
2391 // For fp operands we check if low 32 bits are zeros
2392 if (Literal.getLoBits(32) != 0 &&
2393 (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
2394 !HasMandatoryLiteral) {
2395 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(
2396 Inst.getLoc(),
2397 "Can't encode literal as exact 64-bit floating-point operand. "
2398 "Low 32-bits will be set to zero");
2399 Val &= 0xffffffff00000000u;
2400 }
2401
2403 setImmKindLiteral();
2404 return;
2405 }
2406
2407 // We don't allow fp literals in 64-bit integer instructions. It is
2408 // unclear how we should encode them. This case should be checked earlier
2409 // in predicate methods (isLiteralImm())
2410 llvm_unreachable("fp literal in 64-bit integer instruction.");
2411
2414 setImmKindMandatoryLiteral();
2415 return;
2416
2421 if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) {
2422 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2423 // loss of precision. The constant represents ideomatic fp32 value of
2424 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2425 // bits. Prevent rounding below.
2426 Inst.addOperand(MCOperand::createImm(0x3e22));
2427 setImmKindLiteral();
2428 return;
2429 }
2430 [[fallthrough]];
2431
2452 bool lost;
2453 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2454 // Convert literal to single precision
2455 FPLiteral.convert(*getOpFltSemantics(OpTy),
2456 APFloat::rmNearestTiesToEven, &lost);
2457 // We allow precision lost but not overflow or underflow. This should be
2458 // checked earlier in isLiteralImm()
2459
2460 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2461 Inst.addOperand(MCOperand::createImm(ImmVal));
2462 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2463 setImmKindMandatoryLiteral();
2464 } else {
2465 setImmKindLiteral();
2466 }
2467 return;
2468 }
2469 default:
2470 llvm_unreachable("invalid operand size");
2471 }
2472
2473 return;
2474 }
2475
2476 // We got int literal token.
2477 // Only sign extend inline immediates.
2478 switch (OpTy) {
2491 if (isSafeTruncation(Val, 32) &&
2492 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2493 AsmParser->hasInv2PiInlineImm())) {
2495 setImmKindConst();
2496 return;
2497 }
2498 [[fallthrough]];
2499
2501
2503 setImmKindLiteral();
2504 return;
2505
2508 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2510 setImmKindConst();
2511 return;
2512 }
2513
2514 // When the 32 MSBs are not zero (effectively means it can't be safely
2515 // truncated to uint32_t), if the target doesn't support 64-bit literals, or
2516 // the lit modifier is explicitly used, we need to truncate it to the 32
2517 // LSBs.
2518 if (!AsmParser->has64BitLiterals() || getModifiers().Lit)
2519 Val = Lo_32(Val);
2520
2522 setImmKindLiteral();
2523 return;
2524
2528 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2530 setImmKindConst();
2531 return;
2532 }
2533
2534 // If the target doesn't support 64-bit literals, we need to use the
2535 // constant as the high 32 MSBs of a double-precision floating point value.
2536 if (!AsmParser->has64BitLiterals()) {
2537 Val = static_cast<uint64_t>(Val) << 32;
2538 } else {
2539 // Now the target does support 64-bit literals, there are two cases
2540 // where we still want to use src_literal encoding:
2541 // 1) explicitly forced by using lit modifier;
2542 // 2) the value is a valid 32-bit representation (signed or unsigned),
2543 // meanwhile not forced by lit64 modifier.
2544 if (getModifiers().Lit ||
2545 (!getModifiers().Lit64 && (isInt<32>(Val) || isUInt<32>(Val))))
2546 Val = static_cast<uint64_t>(Val) << 32;
2547 }
2548
2550 setImmKindLiteral();
2551 return;
2552
2555 if (isSafeTruncation(Val, 16) &&
2556 AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) {
2558 setImmKindConst();
2559 return;
2560 }
2561
2562 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2563 setImmKindLiteral();
2564 return;
2565
2568 if (isSafeTruncation(Val, 16) &&
2569 AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2570 AsmParser->hasInv2PiInlineImm())) {
2572 setImmKindConst();
2573 return;
2574 }
2575
2576 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2577 setImmKindLiteral();
2578 return;
2579
2582 if (isSafeTruncation(Val, 16) &&
2583 AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2584 AsmParser->hasInv2PiInlineImm())) {
2586 setImmKindConst();
2587 return;
2588 }
2589
2590 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2591 setImmKindLiteral();
2592 return;
2593
2595 assert(isSafeTruncation(Val, 16));
2596 assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));
2598 return;
2599 }
2601 assert(isSafeTruncation(Val, 16));
2602 assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2603 AsmParser->hasInv2PiInlineImm()));
2604
2606 return;
2607 }
2608
2610 assert(isSafeTruncation(Val, 16));
2611 assert(AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2612 AsmParser->hasInv2PiInlineImm()));
2613
2615 return;
2616 }
2617
2619 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2620 setImmKindMandatoryLiteral();
2621 return;
2623 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2624 setImmKindMandatoryLiteral();
2625 return;
2627 if ((isInt<32>(Val) || isUInt<32>(Val)) && !getModifiers().Lit64)
2628 Val <<= 32;
2629
2631 setImmKindMandatoryLiteral();
2632 return;
2633 default:
2634 llvm_unreachable("invalid operand size");
2635 }
2636}
2637
2638void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2639 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2640}
2641
2642bool AMDGPUOperand::isInlineValue() const {
2643 return isRegKind() && ::isInlineValue(getReg());
2644}
2645
2646//===----------------------------------------------------------------------===//
2647// AsmParser
2648//===----------------------------------------------------------------------===//
2649
2650void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2651 // TODO: make those pre-defined variables read-only.
2652 // Currently there is none suitable machinery in the core llvm-mc for this.
2653 // MCSymbol::isRedefinable is intended for another purpose, and
2654 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2655 MCContext &Ctx = getContext();
2656 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2658}
2659
2660static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2661 if (Is == IS_VGPR) {
2662 switch (RegWidth) {
2663 default: return -1;
2664 case 32:
2665 return AMDGPU::VGPR_32RegClassID;
2666 case 64:
2667 return AMDGPU::VReg_64RegClassID;
2668 case 96:
2669 return AMDGPU::VReg_96RegClassID;
2670 case 128:
2671 return AMDGPU::VReg_128RegClassID;
2672 case 160:
2673 return AMDGPU::VReg_160RegClassID;
2674 case 192:
2675 return AMDGPU::VReg_192RegClassID;
2676 case 224:
2677 return AMDGPU::VReg_224RegClassID;
2678 case 256:
2679 return AMDGPU::VReg_256RegClassID;
2680 case 288:
2681 return AMDGPU::VReg_288RegClassID;
2682 case 320:
2683 return AMDGPU::VReg_320RegClassID;
2684 case 352:
2685 return AMDGPU::VReg_352RegClassID;
2686 case 384:
2687 return AMDGPU::VReg_384RegClassID;
2688 case 512:
2689 return AMDGPU::VReg_512RegClassID;
2690 case 1024:
2691 return AMDGPU::VReg_1024RegClassID;
2692 }
2693 } else if (Is == IS_TTMP) {
2694 switch (RegWidth) {
2695 default: return -1;
2696 case 32:
2697 return AMDGPU::TTMP_32RegClassID;
2698 case 64:
2699 return AMDGPU::TTMP_64RegClassID;
2700 case 128:
2701 return AMDGPU::TTMP_128RegClassID;
2702 case 256:
2703 return AMDGPU::TTMP_256RegClassID;
2704 case 512:
2705 return AMDGPU::TTMP_512RegClassID;
2706 }
2707 } else if (Is == IS_SGPR) {
2708 switch (RegWidth) {
2709 default: return -1;
2710 case 32:
2711 return AMDGPU::SGPR_32RegClassID;
2712 case 64:
2713 return AMDGPU::SGPR_64RegClassID;
2714 case 96:
2715 return AMDGPU::SGPR_96RegClassID;
2716 case 128:
2717 return AMDGPU::SGPR_128RegClassID;
2718 case 160:
2719 return AMDGPU::SGPR_160RegClassID;
2720 case 192:
2721 return AMDGPU::SGPR_192RegClassID;
2722 case 224:
2723 return AMDGPU::SGPR_224RegClassID;
2724 case 256:
2725 return AMDGPU::SGPR_256RegClassID;
2726 case 288:
2727 return AMDGPU::SGPR_288RegClassID;
2728 case 320:
2729 return AMDGPU::SGPR_320RegClassID;
2730 case 352:
2731 return AMDGPU::SGPR_352RegClassID;
2732 case 384:
2733 return AMDGPU::SGPR_384RegClassID;
2734 case 512:
2735 return AMDGPU::SGPR_512RegClassID;
2736 }
2737 } else if (Is == IS_AGPR) {
2738 switch (RegWidth) {
2739 default: return -1;
2740 case 32:
2741 return AMDGPU::AGPR_32RegClassID;
2742 case 64:
2743 return AMDGPU::AReg_64RegClassID;
2744 case 96:
2745 return AMDGPU::AReg_96RegClassID;
2746 case 128:
2747 return AMDGPU::AReg_128RegClassID;
2748 case 160:
2749 return AMDGPU::AReg_160RegClassID;
2750 case 192:
2751 return AMDGPU::AReg_192RegClassID;
2752 case 224:
2753 return AMDGPU::AReg_224RegClassID;
2754 case 256:
2755 return AMDGPU::AReg_256RegClassID;
2756 case 288:
2757 return AMDGPU::AReg_288RegClassID;
2758 case 320:
2759 return AMDGPU::AReg_320RegClassID;
2760 case 352:
2761 return AMDGPU::AReg_352RegClassID;
2762 case 384:
2763 return AMDGPU::AReg_384RegClassID;
2764 case 512:
2765 return AMDGPU::AReg_512RegClassID;
2766 case 1024:
2767 return AMDGPU::AReg_1024RegClassID;
2768 }
2769 }
2770 return -1;
2771}
2772
2775 .Case("exec", AMDGPU::EXEC)
2776 .Case("vcc", AMDGPU::VCC)
2777 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2778 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2779 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2780 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2781 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2782 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2783 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2784 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2785 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2786 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2787 .Case("src_flat_scratch_base_lo", AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)
2788 .Case("src_flat_scratch_base_hi", AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)
2789 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2790 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2791 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2792 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2793 .Case("m0", AMDGPU::M0)
2794 .Case("vccz", AMDGPU::SRC_VCCZ)
2795 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2796 .Case("execz", AMDGPU::SRC_EXECZ)
2797 .Case("src_execz", AMDGPU::SRC_EXECZ)
2798 .Case("scc", AMDGPU::SRC_SCC)
2799 .Case("src_scc", AMDGPU::SRC_SCC)
2800 .Case("tba", AMDGPU::TBA)
2801 .Case("tma", AMDGPU::TMA)
2802 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2803 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2804 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2805 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2806 .Case("vcc_lo", AMDGPU::VCC_LO)
2807 .Case("vcc_hi", AMDGPU::VCC_HI)
2808 .Case("exec_lo", AMDGPU::EXEC_LO)
2809 .Case("exec_hi", AMDGPU::EXEC_HI)
2810 .Case("tma_lo", AMDGPU::TMA_LO)
2811 .Case("tma_hi", AMDGPU::TMA_HI)
2812 .Case("tba_lo", AMDGPU::TBA_LO)
2813 .Case("tba_hi", AMDGPU::TBA_HI)
2814 .Case("pc", AMDGPU::PC_REG)
2815 .Case("null", AMDGPU::SGPR_NULL)
2816 .Default(AMDGPU::NoRegister);
2817}
2818
2819bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2820 SMLoc &EndLoc, bool RestoreOnFailure) {
2821 auto R = parseRegister();
2822 if (!R) return true;
2823 assert(R->isReg());
2824 RegNo = R->getReg();
2825 StartLoc = R->getStartLoc();
2826 EndLoc = R->getEndLoc();
2827 return false;
2828}
2829
2830bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2831 SMLoc &EndLoc) {
2832 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2833}
2834
2835ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2836 SMLoc &EndLoc) {
2837 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2838 bool PendingErrors = getParser().hasPendingError();
2839 getParser().clearPendingErrors();
2840 if (PendingErrors)
2841 return ParseStatus::Failure;
2842 if (Result)
2843 return ParseStatus::NoMatch;
2844 return ParseStatus::Success;
2845}
2846
2847bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
2848 RegisterKind RegKind,
2849 MCRegister Reg1, SMLoc Loc) {
2850 switch (RegKind) {
2851 case IS_SPECIAL:
2852 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2853 Reg = AMDGPU::EXEC;
2854 RegWidth = 64;
2855 return true;
2856 }
2857 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2858 Reg = AMDGPU::FLAT_SCR;
2859 RegWidth = 64;
2860 return true;
2861 }
2862 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2863 Reg = AMDGPU::XNACK_MASK;
2864 RegWidth = 64;
2865 return true;
2866 }
2867 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2868 Reg = AMDGPU::VCC;
2869 RegWidth = 64;
2870 return true;
2871 }
2872 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2873 Reg = AMDGPU::TBA;
2874 RegWidth = 64;
2875 return true;
2876 }
2877 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2878 Reg = AMDGPU::TMA;
2879 RegWidth = 64;
2880 return true;
2881 }
2882 Error(Loc, "register does not fit in the list");
2883 return false;
2884 case IS_VGPR:
2885 case IS_SGPR:
2886 case IS_AGPR:
2887 case IS_TTMP:
2888 if (Reg1 != Reg + RegWidth / 32) {
2889 Error(Loc, "registers in a list must have consecutive indices");
2890 return false;
2891 }
2892 RegWidth += 32;
2893 return true;
2894 default:
2895 llvm_unreachable("unexpected register kind");
2896 }
2897}
2898
2899struct RegInfo {
2901 RegisterKind Kind;
2902};
2903
2904static constexpr RegInfo RegularRegisters[] = {
2905 {{"v"}, IS_VGPR},
2906 {{"s"}, IS_SGPR},
2907 {{"ttmp"}, IS_TTMP},
2908 {{"acc"}, IS_AGPR},
2909 {{"a"}, IS_AGPR},
2910};
2911
2912static bool isRegularReg(RegisterKind Kind) {
2913 return Kind == IS_VGPR ||
2914 Kind == IS_SGPR ||
2915 Kind == IS_TTMP ||
2916 Kind == IS_AGPR;
2917}
2918
2920 for (const RegInfo &Reg : RegularRegisters)
2921 if (Str.starts_with(Reg.Name))
2922 return &Reg;
2923 return nullptr;
2924}
2925
2926static bool getRegNum(StringRef Str, unsigned& Num) {
2927 return !Str.getAsInteger(10, Num);
2928}
2929
2930bool
2931AMDGPUAsmParser::isRegister(const AsmToken &Token,
2932 const AsmToken &NextToken) const {
2933
2934 // A list of consecutive registers: [s0,s1,s2,s3]
2935 if (Token.is(AsmToken::LBrac))
2936 return true;
2937
2938 if (!Token.is(AsmToken::Identifier))
2939 return false;
2940
2941 // A single register like s0 or a range of registers like s[0:1]
2942
2943 StringRef Str = Token.getString();
2944 const RegInfo *Reg = getRegularRegInfo(Str);
2945 if (Reg) {
2946 StringRef RegName = Reg->Name;
2947 StringRef RegSuffix = Str.substr(RegName.size());
2948 if (!RegSuffix.empty()) {
2949 RegSuffix.consume_back(".l");
2950 RegSuffix.consume_back(".h");
2951 unsigned Num;
2952 // A single register with an index: rXX
2953 if (getRegNum(RegSuffix, Num))
2954 return true;
2955 } else {
2956 // A range of registers: r[XX:YY].
2957 if (NextToken.is(AsmToken::LBrac))
2958 return true;
2959 }
2960 }
2961
2962 return getSpecialRegForName(Str).isValid();
2963}
2964
2965bool
2966AMDGPUAsmParser::isRegister()
2967{
2968 return isRegister(getToken(), peekToken());
2969}
2970
2971MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2972 unsigned SubReg, unsigned RegWidth,
2973 SMLoc Loc) {
2974 assert(isRegularReg(RegKind));
2975
2976 unsigned AlignSize = 1;
2977 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2978 // SGPR and TTMP registers must be aligned.
2979 // Max required alignment is 4 dwords.
2980 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2981 }
2982
2983 if (RegNum % AlignSize != 0) {
2984 Error(Loc, "invalid register alignment");
2985 return MCRegister();
2986 }
2987
2988 unsigned RegIdx = RegNum / AlignSize;
2989 int RCID = getRegClass(RegKind, RegWidth);
2990 if (RCID == -1) {
2991 Error(Loc, "invalid or unsupported register size");
2992 return MCRegister();
2993 }
2994
2995 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2996 const MCRegisterClass RC = TRI->getRegClass(RCID);
2997 if (RegIdx >= RC.getNumRegs() || (RegKind == IS_VGPR && RegIdx > 255)) {
2998 Error(Loc, "register index is out of range");
2999 return AMDGPU::NoRegister;
3000 }
3001
3002 if (RegKind == IS_VGPR && !isGFX1250() && RegIdx + RegWidth / 32 > 256) {
3003 Error(Loc, "register index is out of range");
3004 return MCRegister();
3005 }
3006
3007 MCRegister Reg = RC.getRegister(RegIdx);
3008
3009 if (SubReg) {
3010 Reg = TRI->getSubReg(Reg, SubReg);
3011
3012 // Currently all regular registers have their .l and .h subregisters, so
3013 // we should never need to generate an error here.
3014 assert(Reg && "Invalid subregister!");
3015 }
3016
3017 return Reg;
3018}
3019
3020bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth,
3021 unsigned &SubReg) {
3022 int64_t RegLo, RegHi;
3023 if (!skipToken(AsmToken::LBrac, "missing register index"))
3024 return false;
3025
3026 SMLoc FirstIdxLoc = getLoc();
3027 SMLoc SecondIdxLoc;
3028
3029 if (!parseExpr(RegLo))
3030 return false;
3031
3032 if (trySkipToken(AsmToken::Colon)) {
3033 SecondIdxLoc = getLoc();
3034 if (!parseExpr(RegHi))
3035 return false;
3036 } else {
3037 RegHi = RegLo;
3038 }
3039
3040 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
3041 return false;
3042
3043 if (!isUInt<32>(RegLo)) {
3044 Error(FirstIdxLoc, "invalid register index");
3045 return false;
3046 }
3047
3048 if (!isUInt<32>(RegHi)) {
3049 Error(SecondIdxLoc, "invalid register index");
3050 return false;
3051 }
3052
3053 if (RegLo > RegHi) {
3054 Error(FirstIdxLoc, "first register index should not exceed second index");
3055 return false;
3056 }
3057
3058 if (RegHi == RegLo) {
3059 StringRef RegSuffix = getTokenStr();
3060 if (RegSuffix == ".l") {
3061 SubReg = AMDGPU::lo16;
3062 lex();
3063 } else if (RegSuffix == ".h") {
3064 SubReg = AMDGPU::hi16;
3065 lex();
3066 }
3067 }
3068
3069 Num = static_cast<unsigned>(RegLo);
3070 RegWidth = 32 * ((RegHi - RegLo) + 1);
3071
3072 return true;
3073}
3074
3075MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
3076 unsigned &RegNum,
3077 unsigned &RegWidth,
3078 SmallVectorImpl<AsmToken> &Tokens) {
3079 assert(isToken(AsmToken::Identifier));
3080 MCRegister Reg = getSpecialRegForName(getTokenStr());
3081 if (Reg) {
3082 RegNum = 0;
3083 RegWidth = 32;
3084 RegKind = IS_SPECIAL;
3085 Tokens.push_back(getToken());
3086 lex(); // skip register name
3087 }
3088 return Reg;
3089}
3090
3091MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
3092 unsigned &RegNum,
3093 unsigned &RegWidth,
3094 SmallVectorImpl<AsmToken> &Tokens) {
3095 assert(isToken(AsmToken::Identifier));
3096 StringRef RegName = getTokenStr();
3097 auto Loc = getLoc();
3098
3099 const RegInfo *RI = getRegularRegInfo(RegName);
3100 if (!RI) {
3101 Error(Loc, "invalid register name");
3102 return MCRegister();
3103 }
3104
3105 Tokens.push_back(getToken());
3106 lex(); // skip register name
3107
3108 RegKind = RI->Kind;
3109 StringRef RegSuffix = RegName.substr(RI->Name.size());
3110 unsigned SubReg = NoSubRegister;
3111 if (!RegSuffix.empty()) {
3112 if (RegSuffix.consume_back(".l"))
3113 SubReg = AMDGPU::lo16;
3114 else if (RegSuffix.consume_back(".h"))
3115 SubReg = AMDGPU::hi16;
3116
3117 // Single 32-bit register: vXX.
3118 if (!getRegNum(RegSuffix, RegNum)) {
3119 Error(Loc, "invalid register index");
3120 return MCRegister();
3121 }
3122 RegWidth = 32;
3123 } else {
3124 // Range of registers: v[XX:YY]. ":YY" is optional.
3125 if (!ParseRegRange(RegNum, RegWidth, SubReg))
3126 return MCRegister();
3127 }
3128
3129 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
3130}
3131
3132MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
3133 unsigned &RegNum, unsigned &RegWidth,
3134 SmallVectorImpl<AsmToken> &Tokens) {
3135 MCRegister Reg;
3136 auto ListLoc = getLoc();
3137
3138 if (!skipToken(AsmToken::LBrac,
3139 "expected a register or a list of registers")) {
3140 return MCRegister();
3141 }
3142
3143 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
3144
3145 auto Loc = getLoc();
3146 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
3147 return MCRegister();
3148 if (RegWidth != 32) {
3149 Error(Loc, "expected a single 32-bit register");
3150 return MCRegister();
3151 }
3152
3153 for (; trySkipToken(AsmToken::Comma); ) {
3154 RegisterKind NextRegKind;
3155 MCRegister NextReg;
3156 unsigned NextRegNum, NextRegWidth;
3157 Loc = getLoc();
3158
3159 if (!ParseAMDGPURegister(NextRegKind, NextReg,
3160 NextRegNum, NextRegWidth,
3161 Tokens)) {
3162 return MCRegister();
3163 }
3164 if (NextRegWidth != 32) {
3165 Error(Loc, "expected a single 32-bit register");
3166 return MCRegister();
3167 }
3168 if (NextRegKind != RegKind) {
3169 Error(Loc, "registers in a list must be of the same kind");
3170 return MCRegister();
3171 }
3172 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
3173 return MCRegister();
3174 }
3175
3176 if (!skipToken(AsmToken::RBrac,
3177 "expected a comma or a closing square bracket")) {
3178 return MCRegister();
3179 }
3180
3181 if (isRegularReg(RegKind))
3182 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3183
3184 return Reg;
3185}
3186
3187bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3188 MCRegister &Reg, unsigned &RegNum,
3189 unsigned &RegWidth,
3190 SmallVectorImpl<AsmToken> &Tokens) {
3191 auto Loc = getLoc();
3192 Reg = MCRegister();
3193
3194 if (isToken(AsmToken::Identifier)) {
3195 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3196 if (!Reg)
3197 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3198 } else {
3199 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3200 }
3201
3202 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3203 if (!Reg) {
3204 assert(Parser.hasPendingError());
3205 return false;
3206 }
3207
3208 if (!subtargetHasRegister(*TRI, Reg)) {
3209 if (Reg == AMDGPU::SGPR_NULL) {
3210 Error(Loc, "'null' operand is not supported on this GPU");
3211 } else {
3213 " register not available on this GPU");
3214 }
3215 return false;
3216 }
3217
3218 return true;
3219}
3220
3221bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3222 MCRegister &Reg, unsigned &RegNum,
3223 unsigned &RegWidth,
3224 bool RestoreOnFailure /*=false*/) {
3225 Reg = MCRegister();
3226
3228 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3229 if (RestoreOnFailure) {
3230 while (!Tokens.empty()) {
3231 getLexer().UnLex(Tokens.pop_back_val());
3232 }
3233 }
3234 return true;
3235 }
3236 return false;
3237}
3238
3239std::optional<StringRef>
3240AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3241 switch (RegKind) {
3242 case IS_VGPR:
3243 return StringRef(".amdgcn.next_free_vgpr");
3244 case IS_SGPR:
3245 return StringRef(".amdgcn.next_free_sgpr");
3246 default:
3247 return std::nullopt;
3248 }
3249}
3250
3251void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3252 auto SymbolName = getGprCountSymbolName(RegKind);
3253 assert(SymbolName && "initializing invalid register kind");
3254 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3256 Sym->setRedefinable(true);
3257}
3258
3259bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3260 unsigned DwordRegIndex,
3261 unsigned RegWidth) {
3262 // Symbols are only defined for GCN targets
3263 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3264 return true;
3265
3266 auto SymbolName = getGprCountSymbolName(RegKind);
3267 if (!SymbolName)
3268 return true;
3269 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3270
3271 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3272 int64_t OldCount;
3273
3274 if (!Sym->isVariable())
3275 return !Error(getLoc(),
3276 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3277 if (!Sym->getVariableValue()->evaluateAsAbsolute(OldCount))
3278 return !Error(
3279 getLoc(),
3280 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3281
3282 if (OldCount <= NewMax)
3284
3285 return true;
3286}
3287
3288std::unique_ptr<AMDGPUOperand>
3289AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3290 const auto &Tok = getToken();
3291 SMLoc StartLoc = Tok.getLoc();
3292 SMLoc EndLoc = Tok.getEndLoc();
3293 RegisterKind RegKind;
3294 MCRegister Reg;
3295 unsigned RegNum, RegWidth;
3296
3297 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3298 return nullptr;
3299 }
3300 if (isHsaAbi(getSTI())) {
3301 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3302 return nullptr;
3303 } else
3304 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3305 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3306}
3307
3308ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3309 bool HasSP3AbsModifier, bool HasLit,
3310 bool HasLit64) {
3311 // TODO: add syntactic sugar for 1/(2*PI)
3312
3313 if (isRegister() || isModifier())
3314 return ParseStatus::NoMatch;
3315
3316 if (!HasLit && !HasLit64) {
3317 HasLit64 = trySkipId("lit64");
3318 HasLit = !HasLit64 && trySkipId("lit");
3319 if (HasLit || HasLit64) {
3320 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3321 return ParseStatus::Failure;
3322 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit, HasLit64);
3323 if (S.isSuccess() &&
3324 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3325 return ParseStatus::Failure;
3326 return S;
3327 }
3328 }
3329
3330 const auto& Tok = getToken();
3331 const auto& NextTok = peekToken();
3332 bool IsReal = Tok.is(AsmToken::Real);
3333 SMLoc S = getLoc();
3334 bool Negate = false;
3335
3336 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3337 lex();
3338 IsReal = true;
3339 Negate = true;
3340 }
3341
3342 AMDGPUOperand::Modifiers Mods;
3343 Mods.Lit = HasLit;
3344 Mods.Lit64 = HasLit64;
3345
3346 if (IsReal) {
3347 // Floating-point expressions are not supported.
3348 // Can only allow floating-point literals with an
3349 // optional sign.
3350
3351 StringRef Num = getTokenStr();
3352 lex();
3353
3354 APFloat RealVal(APFloat::IEEEdouble());
3355 auto roundMode = APFloat::rmNearestTiesToEven;
3356 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3357 return ParseStatus::Failure;
3358 if (Negate)
3359 RealVal.changeSign();
3360
3361 Operands.push_back(
3362 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3363 AMDGPUOperand::ImmTyNone, true));
3364 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3365 Op.setModifiers(Mods);
3366
3367 return ParseStatus::Success;
3368
3369 } else {
3370 int64_t IntVal;
3371 const MCExpr *Expr;
3372 SMLoc S = getLoc();
3373
3374 if (HasSP3AbsModifier) {
3375 // This is a workaround for handling expressions
3376 // as arguments of SP3 'abs' modifier, for example:
3377 // |1.0|
3378 // |-1|
3379 // |1+x|
3380 // This syntax is not compatible with syntax of standard
3381 // MC expressions (due to the trailing '|').
3382 SMLoc EndLoc;
3383 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3384 return ParseStatus::Failure;
3385 } else {
3386 if (Parser.parseExpression(Expr))
3387 return ParseStatus::Failure;
3388 }
3389
3390 if (Expr->evaluateAsAbsolute(IntVal)) {
3391 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3392 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3393 Op.setModifiers(Mods);
3394 } else {
3395 if (HasLit || HasLit64)
3396 return ParseStatus::NoMatch;
3397 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3398 }
3399
3400 return ParseStatus::Success;
3401 }
3402
3403 return ParseStatus::NoMatch;
3404}
3405
3406ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3407 if (!isRegister())
3408 return ParseStatus::NoMatch;
3409
3410 if (auto R = parseRegister()) {
3411 assert(R->isReg());
3412 Operands.push_back(std::move(R));
3413 return ParseStatus::Success;
3414 }
3415 return ParseStatus::Failure;
3416}
3417
3418ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3419 bool HasSP3AbsMod, bool HasLit,
3420 bool HasLit64) {
3421 ParseStatus Res = parseReg(Operands);
3422 if (!Res.isNoMatch())
3423 return Res;
3424 if (isModifier())
3425 return ParseStatus::NoMatch;
3426 return parseImm(Operands, HasSP3AbsMod, HasLit, HasLit64);
3427}
3428
3429bool
3430AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3431 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3432 const auto &str = Token.getString();
3433 return str == "abs" || str == "neg" || str == "sext";
3434 }
3435 return false;
3436}
3437
3438bool
3439AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3440 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3441}
3442
3443bool
3444AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3445 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3446}
3447
3448bool
3449AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3450 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3451}
3452
3453// Check if this is an operand modifier or an opcode modifier
3454// which may look like an expression but it is not. We should
3455// avoid parsing these modifiers as expressions. Currently
3456// recognized sequences are:
3457// |...|
3458// abs(...)
3459// neg(...)
3460// sext(...)
3461// -reg
3462// -|...|
3463// -abs(...)
3464// name:...
3465//
3466bool
3467AMDGPUAsmParser::isModifier() {
3468
3469 AsmToken Tok = getToken();
3470 AsmToken NextToken[2];
3471 peekTokens(NextToken);
3472
3473 return isOperandModifier(Tok, NextToken[0]) ||
3474 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3475 isOpcodeModifierWithVal(Tok, NextToken[0]);
3476}
3477
3478// Check if the current token is an SP3 'neg' modifier.
3479// Currently this modifier is allowed in the following context:
3480//
3481// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3482// 2. Before an 'abs' modifier: -abs(...)
3483// 3. Before an SP3 'abs' modifier: -|...|
3484//
3485// In all other cases "-" is handled as a part
3486// of an expression that follows the sign.
3487//
3488// Note: When "-" is followed by an integer literal,
3489// this is interpreted as integer negation rather
3490// than a floating-point NEG modifier applied to N.
3491// Beside being contr-intuitive, such use of floating-point
3492// NEG modifier would have resulted in different meaning
3493// of integer literals used with VOP1/2/C and VOP3,
3494// for example:
3495// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3496// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3497// Negative fp literals with preceding "-" are
3498// handled likewise for uniformity
3499//
3500bool
3501AMDGPUAsmParser::parseSP3NegModifier() {
3502
3503 AsmToken NextToken[2];
3504 peekTokens(NextToken);
3505
3506 if (isToken(AsmToken::Minus) &&
3507 (isRegister(NextToken[0], NextToken[1]) ||
3508 NextToken[0].is(AsmToken::Pipe) ||
3509 isId(NextToken[0], "abs"))) {
3510 lex();
3511 return true;
3512 }
3513
3514 return false;
3515}
3516
3517ParseStatus
3518AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3519 bool AllowImm) {
3520 bool Neg, SP3Neg;
3521 bool Abs, SP3Abs;
3522 bool Lit64, Lit;
3523 SMLoc Loc;
3524
3525 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3526 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3527 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3528
3529 SP3Neg = parseSP3NegModifier();
3530
3531 Loc = getLoc();
3532 Neg = trySkipId("neg");
3533 if (Neg && SP3Neg)
3534 return Error(Loc, "expected register or immediate");
3535 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3536 return ParseStatus::Failure;
3537
3538 Abs = trySkipId("abs");
3539 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3540 return ParseStatus::Failure;
3541
3542 Lit64 = trySkipId("lit64");
3543 if (Lit64) {
3544 if (!skipToken(AsmToken::LParen, "expected left paren after lit64"))
3545 return ParseStatus::Failure;
3546 if (!has64BitLiterals())
3547 return Error(Loc, "lit64 is not supported on this GPU");
3548 }
3549
3550 Lit = !Lit64 && trySkipId("lit");
3551 if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit"))
3552 return ParseStatus::Failure;
3553
3554 Loc = getLoc();
3555 SP3Abs = trySkipToken(AsmToken::Pipe);
3556 if (Abs && SP3Abs)
3557 return Error(Loc, "expected register or immediate");
3558
3559 ParseStatus Res;
3560 if (AllowImm) {
3561 Res = parseRegOrImm(Operands, SP3Abs, Lit, Lit64);
3562 } else {
3563 Res = parseReg(Operands);
3564 }
3565 if (!Res.isSuccess())
3566 return (SP3Neg || Neg || SP3Abs || Abs || Lit || Lit64)
3568 : Res;
3569
3570 if ((Lit || Lit64) && !Operands.back()->isImm())
3571 Error(Loc, "expected immediate with lit modifier");
3572
3573 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3574 return ParseStatus::Failure;
3575 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3576 return ParseStatus::Failure;
3577 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3578 return ParseStatus::Failure;
3579 if ((Lit || Lit64) &&
3580 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3581 return ParseStatus::Failure;
3582
3583 AMDGPUOperand::Modifiers Mods;
3584 Mods.Abs = Abs || SP3Abs;
3585 Mods.Neg = Neg || SP3Neg;
3586 Mods.Lit = Lit;
3587 Mods.Lit64 = Lit64;
3588
3589 if (Mods.hasFPModifiers() || Lit || Lit64) {
3590 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3591 if (Op.isExpr())
3592 return Error(Op.getStartLoc(), "expected an absolute expression");
3593 Op.setModifiers(Mods);
3594 }
3595 return ParseStatus::Success;
3596}
3597
3598ParseStatus
3599AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3600 bool AllowImm) {
3601 bool Sext = trySkipId("sext");
3602 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3603 return ParseStatus::Failure;
3604
3605 ParseStatus Res;
3606 if (AllowImm) {
3607 Res = parseRegOrImm(Operands);
3608 } else {
3609 Res = parseReg(Operands);
3610 }
3611 if (!Res.isSuccess())
3612 return Sext ? ParseStatus::Failure : Res;
3613
3614 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3615 return ParseStatus::Failure;
3616
3617 AMDGPUOperand::Modifiers Mods;
3618 Mods.Sext = Sext;
3619
3620 if (Mods.hasIntModifiers()) {
3621 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3622 if (Op.isExpr())
3623 return Error(Op.getStartLoc(), "expected an absolute expression");
3624 Op.setModifiers(Mods);
3625 }
3626
3627 return ParseStatus::Success;
3628}
3629
3630ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3631 return parseRegOrImmWithFPInputMods(Operands, false);
3632}
3633
3634ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3635 return parseRegOrImmWithIntInputMods(Operands, false);
3636}
3637
3638ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3639 auto Loc = getLoc();
3640 if (trySkipId("off")) {
3641 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3642 AMDGPUOperand::ImmTyOff, false));
3643 return ParseStatus::Success;
3644 }
3645
3646 if (!isRegister())
3647 return ParseStatus::NoMatch;
3648
3649 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3650 if (Reg) {
3651 Operands.push_back(std::move(Reg));
3652 return ParseStatus::Success;
3653 }
3654
3655 return ParseStatus::Failure;
3656}
3657
3658unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3659 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3660
3661 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3662 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3663 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3664 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3665 return Match_InvalidOperand;
3666
3667 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3668 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3669 // v_mac_f32/16 allow only dst_sel == DWORD;
3670 auto OpNum =
3671 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3672 const auto &Op = Inst.getOperand(OpNum);
3673 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3674 return Match_InvalidOperand;
3675 }
3676 }
3677
3678 // Asm can first try to match VOPD or VOPD3. By failing early here with
3679 // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
3680 // Checking later during validateInstruction does not give a chance to retry
3681 // parsing as a different encoding.
3682 if (tryAnotherVOPDEncoding(Inst))
3683 return Match_InvalidOperand;
3684
3685 return Match_Success;
3686}
3687
3697
3698// What asm variants we should check
3699ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3700 if (isForcedDPP() && isForcedVOP3()) {
3701 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3702 return ArrayRef(Variants);
3703 }
3704 if (getForcedEncodingSize() == 32) {
3705 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3706 return ArrayRef(Variants);
3707 }
3708
3709 if (isForcedVOP3()) {
3710 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3711 return ArrayRef(Variants);
3712 }
3713
3714 if (isForcedSDWA()) {
3715 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3717 return ArrayRef(Variants);
3718 }
3719
3720 if (isForcedDPP()) {
3721 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3722 return ArrayRef(Variants);
3723 }
3724
3725 return getAllVariants();
3726}
3727
3728StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3729 if (isForcedDPP() && isForcedVOP3())
3730 return "e64_dpp";
3731
3732 if (getForcedEncodingSize() == 32)
3733 return "e32";
3734
3735 if (isForcedVOP3())
3736 return "e64";
3737
3738 if (isForcedSDWA())
3739 return "sdwa";
3740
3741 if (isForcedDPP())
3742 return "dpp";
3743
3744 return "";
3745}
3746
3747unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3748 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3749 for (MCPhysReg Reg : Desc.implicit_uses()) {
3750 switch (Reg) {
3751 case AMDGPU::FLAT_SCR:
3752 case AMDGPU::VCC:
3753 case AMDGPU::VCC_LO:
3754 case AMDGPU::VCC_HI:
3755 case AMDGPU::M0:
3756 return Reg;
3757 default:
3758 break;
3759 }
3760 }
3761 return AMDGPU::NoRegister;
3762}
3763
3764// NB: This code is correct only when used to check constant
3765// bus limitations because GFX7 support no f16 inline constants.
3766// Note that there are no cases when a GFX7 opcode violates
3767// constant bus limitations due to the use of an f16 constant.
3768bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3769 unsigned OpIdx) const {
3770 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3771
3774 return false;
3775 }
3776
3777 const MCOperand &MO = Inst.getOperand(OpIdx);
3778
3779 int64_t Val = MO.getImm();
3780 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3781
3782 switch (OpSize) { // expected operand size
3783 case 8:
3784 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3785 case 4:
3786 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3787 case 2: {
3788 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3791 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3792
3796
3800
3804
3807 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3808
3811 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3812
3814 return false;
3815
3816 llvm_unreachable("invalid operand type");
3817 }
3818 default:
3819 llvm_unreachable("invalid operand size");
3820 }
3821}
3822
3823unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3824 if (!isGFX10Plus())
3825 return 1;
3826
3827 switch (Opcode) {
3828 // 64-bit shift instructions can use only one scalar value input
3829 case AMDGPU::V_LSHLREV_B64_e64:
3830 case AMDGPU::V_LSHLREV_B64_gfx10:
3831 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3832 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3833 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3834 case AMDGPU::V_LSHRREV_B64_e64:
3835 case AMDGPU::V_LSHRREV_B64_gfx10:
3836 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3837 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3838 case AMDGPU::V_ASHRREV_I64_e64:
3839 case AMDGPU::V_ASHRREV_I64_gfx10:
3840 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3841 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3842 case AMDGPU::V_LSHL_B64_e64:
3843 case AMDGPU::V_LSHR_B64_e64:
3844 case AMDGPU::V_ASHR_I64_e64:
3845 return 1;
3846 default:
3847 return 2;
3848 }
3849}
3850
3851constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3853
3854// Get regular operand indices in the same order as specified
3855// in the instruction (but append mandatory literals to the end).
3857 bool AddMandatoryLiterals = false) {
3858
3859 int16_t ImmIdx =
3860 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3861
3862 if (isVOPD(Opcode)) {
3863 int16_t ImmXIdx =
3864 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immX) : -1;
3865
3866 return {getNamedOperandIdx(Opcode, OpName::src0X),
3867 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3868 getNamedOperandIdx(Opcode, OpName::vsrc2X),
3869 getNamedOperandIdx(Opcode, OpName::src0Y),
3870 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3871 getNamedOperandIdx(Opcode, OpName::vsrc2Y),
3872 ImmXIdx,
3873 ImmIdx};
3874 }
3875
3876 return {getNamedOperandIdx(Opcode, OpName::src0),
3877 getNamedOperandIdx(Opcode, OpName::src1),
3878 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3879}
3880
3881bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3882 const MCOperand &MO = Inst.getOperand(OpIdx);
3883 if (MO.isImm())
3884 return !isInlineConstant(Inst, OpIdx);
3885 if (MO.isReg()) {
3886 auto Reg = MO.getReg();
3887 if (!Reg)
3888 return false;
3889 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3890 auto PReg = mc2PseudoReg(Reg);
3891 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3892 }
3893 return true;
3894}
3895
3896// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3897// Writelane is special in that it can use SGPR and M0 (which would normally
3898// count as using the constant bus twice - but in this case it is allowed since
3899// the lane selector doesn't count as a use of the constant bus). However, it is
3900// still required to abide by the 1 SGPR rule.
3901static bool checkWriteLane(const MCInst &Inst) {
3902 const unsigned Opcode = Inst.getOpcode();
3903 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3904 return false;
3905 const MCOperand &LaneSelOp = Inst.getOperand(2);
3906 if (!LaneSelOp.isReg())
3907 return false;
3908 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3909 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3910}
3911
3912bool AMDGPUAsmParser::validateConstantBusLimitations(
3913 const MCInst &Inst, const OperandVector &Operands) {
3914 const unsigned Opcode = Inst.getOpcode();
3915 const MCInstrDesc &Desc = MII.get(Opcode);
3916 MCRegister LastSGPR;
3917 unsigned ConstantBusUseCount = 0;
3918 unsigned NumLiterals = 0;
3919 unsigned LiteralSize;
3920
3921 if (!(Desc.TSFlags &
3924 !isVOPD(Opcode))
3925 return true;
3926
3927 if (checkWriteLane(Inst))
3928 return true;
3929
3930 // Check special imm operands (used by madmk, etc)
3931 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3932 ++NumLiterals;
3933 LiteralSize = 4;
3934 }
3935
3936 SmallDenseSet<unsigned> SGPRsUsed;
3937 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3938 if (SGPRUsed != AMDGPU::NoRegister) {
3939 SGPRsUsed.insert(SGPRUsed);
3940 ++ConstantBusUseCount;
3941 }
3942
3943 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3944
3945 for (int OpIdx : OpIndices) {
3946 if (OpIdx == -1)
3947 continue;
3948
3949 const MCOperand &MO = Inst.getOperand(OpIdx);
3950 if (usesConstantBus(Inst, OpIdx)) {
3951 if (MO.isReg()) {
3952 LastSGPR = mc2PseudoReg(MO.getReg());
3953 // Pairs of registers with a partial intersections like these
3954 // s0, s[0:1]
3955 // flat_scratch_lo, flat_scratch
3956 // flat_scratch_lo, flat_scratch_hi
3957 // are theoretically valid but they are disabled anyway.
3958 // Note that this code mimics SIInstrInfo::verifyInstruction
3959 if (SGPRsUsed.insert(LastSGPR).second) {
3960 ++ConstantBusUseCount;
3961 }
3962 } else { // Expression or a literal
3963
3964 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3965 continue; // special operand like VINTERP attr_chan
3966
3967 // An instruction may use only one literal.
3968 // This has been validated on the previous step.
3969 // See validateVOPLiteral.
3970 // This literal may be used as more than one operand.
3971 // If all these operands are of the same size,
3972 // this literal counts as one scalar value.
3973 // Otherwise it counts as 2 scalar values.
3974 // See "GFX10 Shader Programming", section 3.6.2.3.
3975
3977 if (Size < 4)
3978 Size = 4;
3979
3980 if (NumLiterals == 0) {
3981 NumLiterals = 1;
3982 LiteralSize = Size;
3983 } else if (LiteralSize != Size) {
3984 NumLiterals = 2;
3985 }
3986 }
3987 }
3988 }
3989 ConstantBusUseCount += NumLiterals;
3990
3991 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3992 return true;
3993
3994 SMLoc LitLoc = getLitLoc(Operands);
3995 SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3996 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3997 Error(Loc, "invalid operand (violates constant bus restrictions)");
3998 return false;
3999}
4000
4001std::optional<unsigned>
4002AMDGPUAsmParser::checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3) {
4003
4004 const unsigned Opcode = Inst.getOpcode();
4005 if (!isVOPD(Opcode))
4006 return {};
4007
4008 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4009
4010 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
4011 const MCOperand &Opr = Inst.getOperand(OperandIdx);
4012 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
4013 ? Opr.getReg()
4014 : MCRegister();
4015 };
4016
4017 // On GFX12+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
4018 // source-cache.
4019 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
4020 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
4021 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250;
4022 bool AllowSameVGPR = isGFX1250();
4023
4024 if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
4025 for (auto OpName : {OpName::src0X, OpName::src0Y}) {
4026 int I = getNamedOperandIdx(Opcode, OpName);
4027 const MCOperand &Op = Inst.getOperand(I);
4028 if (!Op.isImm())
4029 continue;
4030 int64_t Imm = Op.getImm();
4031 if (!AMDGPU::isInlinableLiteral32(Imm, hasInv2PiInlineImm()) &&
4032 !AMDGPU::isInlinableLiteral64(Imm, hasInv2PiInlineImm()))
4033 return (unsigned)I;
4034 }
4035
4036 for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
4037 OpName::vsrc2Y, OpName::imm}) {
4038 int I = getNamedOperandIdx(Opcode, OpName);
4039 if (I == -1)
4040 continue;
4041 const MCOperand &Op = Inst.getOperand(I);
4042 if (Op.isImm())
4043 return (unsigned)I;
4044 }
4045 }
4046
4047 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
4048 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
4049 getVRegIdx, *TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
4050
4051 return InvalidCompOprIdx;
4052}
4053
4054bool AMDGPUAsmParser::validateVOPD(const MCInst &Inst,
4055 const OperandVector &Operands) {
4056
4057 unsigned Opcode = Inst.getOpcode();
4058 bool AsVOPD3 = MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3;
4059
4060 if (AsVOPD3) {
4061 for (const std::unique_ptr<MCParsedAsmOperand> &Operand : Operands) {
4062 AMDGPUOperand &Op = (AMDGPUOperand &)*Operand;
4063 if ((Op.isRegKind() || Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
4064 (Op.getModifiers().getFPModifiersOperand() & SISrcMods::ABS))
4065 Error(Op.getStartLoc(), "ABS not allowed in VOPD3 instructions");
4066 }
4067 }
4068
4069 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
4070 if (!InvalidCompOprIdx.has_value())
4071 return true;
4072
4073 auto CompOprIdx = *InvalidCompOprIdx;
4074 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
4075 auto ParsedIdx =
4076 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
4077 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
4078 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
4079
4080 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
4081 if (CompOprIdx == VOPD::Component::DST) {
4082 if (AsVOPD3)
4083 Error(Loc, "dst registers must be distinct");
4084 else
4085 Error(Loc, "one dst register must be even and the other odd");
4086 } else {
4087 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
4088 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
4089 " operands must use different VGPR banks");
4090 }
4091
4092 return false;
4093}
4094
4095// \returns true if \p Inst does not satisfy VOPD constraints, but can be
4096// potentially used as VOPD3 with the same operands.
4097bool AMDGPUAsmParser::tryVOPD3(const MCInst &Inst) {
4098 // First check if it fits VOPD
4099 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, false);
4100 if (!InvalidCompOprIdx.has_value())
4101 return false;
4102
4103 // Then if it fits VOPD3
4104 InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, true);
4105 if (InvalidCompOprIdx.has_value()) {
4106 // If failed operand is dst it is better to show error about VOPD3
4107 // instruction as it has more capabilities and error message will be
4108 // more informative. If the dst is not legal for VOPD3, then it is not
4109 // legal for VOPD either.
4110 if (*InvalidCompOprIdx == VOPD::Component::DST)
4111 return true;
4112
4113 // Otherwise prefer VOPD as we may find ourselves in an awkward situation
4114 // with a conflict in tied implicit src2 of fmac and no asm operand to
4115 // to point to.
4116 return false;
4117 }
4118 return true;
4119}
4120
4121// \returns true is a VOPD3 instruction can be also represented as a shorter
4122// VOPD encoding.
4123bool AMDGPUAsmParser::tryVOPD(const MCInst &Inst) {
4124 const unsigned Opcode = Inst.getOpcode();
4125 const auto &II = getVOPDInstInfo(Opcode, &MII);
4126 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(getSTI());
4127 if (!getCanBeVOPD(II[VOPD::X].getOpcode(), EncodingFamily, false).X ||
4128 !getCanBeVOPD(II[VOPD::Y].getOpcode(), EncodingFamily, false).Y)
4129 return false;
4130
4131 // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
4132 // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
4133 // be parsed as VOPD which does not accept src2.
4134 if (II[VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 ||
4135 II[VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32)
4136 return false;
4137
4138 // If any modifiers are set this cannot be VOPD.
4139 for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4140 OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4141 OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4142 int I = getNamedOperandIdx(Opcode, OpName);
4143 if (I == -1)
4144 continue;
4145 if (Inst.getOperand(I).getImm())
4146 return false;
4147 }
4148
4149 return !tryVOPD3(Inst);
4150}
4151
4152// VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
4153// form but switch to VOPD3 otherwise.
4154bool AMDGPUAsmParser::tryAnotherVOPDEncoding(const MCInst &Inst) {
4155 const unsigned Opcode = Inst.getOpcode();
4156 if (!isGFX1250() || !isVOPD(Opcode))
4157 return false;
4158
4159 if (MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3)
4160 return tryVOPD(Inst);
4161 return tryVOPD3(Inst);
4162}
4163
4164bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
4165
4166 const unsigned Opc = Inst.getOpcode();
4167 const MCInstrDesc &Desc = MII.get(Opc);
4168
4169 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
4170 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
4171 assert(ClampIdx != -1);
4172 return Inst.getOperand(ClampIdx).getImm() == 0;
4173 }
4174
4175 return true;
4176}
4177
4180
4181bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
4182 const SMLoc &IDLoc) {
4183
4184 const unsigned Opc = Inst.getOpcode();
4185 const MCInstrDesc &Desc = MII.get(Opc);
4186
4187 if ((Desc.TSFlags & MIMGFlags) == 0)
4188 return true;
4189
4190 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
4191 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4192 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
4193
4194 if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample
4195 return true;
4196
4197 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
4198 return true;
4199
4200 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
4201 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
4202 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4203 if (DMask == 0)
4204 DMask = 1;
4205
4206 bool IsPackedD16 = false;
4207 unsigned DataSize =
4208 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
4209 if (hasPackedD16()) {
4210 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4211 IsPackedD16 = D16Idx >= 0;
4212 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
4213 DataSize = (DataSize + 1) / 2;
4214 }
4215
4216 if ((VDataSize / 4) == DataSize + TFESize)
4217 return true;
4218
4219 StringRef Modifiers;
4220 if (isGFX90A())
4221 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
4222 else
4223 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
4224
4225 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
4226 return false;
4227}
4228
4229bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
4230 const SMLoc &IDLoc) {
4231 const unsigned Opc = Inst.getOpcode();
4232 const MCInstrDesc &Desc = MII.get(Opc);
4233
4234 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
4235 return true;
4236
4237 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4238
4239 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4241 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
4242 AMDGPU::OpName RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG)
4243 ? AMDGPU::OpName::srsrc
4244 : AMDGPU::OpName::rsrc;
4245 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
4246 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4247 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
4248
4249 assert(VAddr0Idx != -1);
4250 assert(SrsrcIdx != -1);
4251 assert(SrsrcIdx > VAddr0Idx);
4252
4253 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
4254 if (BaseOpcode->BVH) {
4255 if (IsA16 == BaseOpcode->A16)
4256 return true;
4257 Error(IDLoc, "image address size does not match a16");
4258 return false;
4259 }
4260
4261 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4262 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4263 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
4264 unsigned ActualAddrSize =
4265 IsNSA ? SrsrcIdx - VAddr0Idx
4266 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
4267
4268 unsigned ExpectedAddrSize =
4269 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
4270
4271 if (IsNSA) {
4272 if (hasPartialNSAEncoding() &&
4273 ExpectedAddrSize >
4275 int VAddrLastIdx = SrsrcIdx - 1;
4276 unsigned VAddrLastSize =
4277 AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
4278
4279 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
4280 }
4281 } else {
4282 if (ExpectedAddrSize > 12)
4283 ExpectedAddrSize = 16;
4284
4285 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
4286 // This provides backward compatibility for assembly created
4287 // before 160b/192b/224b types were directly supported.
4288 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4289 return true;
4290 }
4291
4292 if (ActualAddrSize == ExpectedAddrSize)
4293 return true;
4294
4295 Error(IDLoc, "image address size does not match dim and a16");
4296 return false;
4297}
4298
4299bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
4300
4301 const unsigned Opc = Inst.getOpcode();
4302 const MCInstrDesc &Desc = MII.get(Opc);
4303
4304 if ((Desc.TSFlags & MIMGFlags) == 0)
4305 return true;
4306 if (!Desc.mayLoad() || !Desc.mayStore())
4307 return true; // Not atomic
4308
4309 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4310 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4311
4312 // This is an incomplete check because image_atomic_cmpswap
4313 // may only use 0x3 and 0xf while other atomic operations
4314 // may use 0x1 and 0x3. However these limitations are
4315 // verified when we check that dmask matches dst size.
4316 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4317}
4318
4319bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4320
4321 const unsigned Opc = Inst.getOpcode();
4322 const MCInstrDesc &Desc = MII.get(Opc);
4323
4324 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4325 return true;
4326
4327 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4328 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4329
4330 // GATHER4 instructions use dmask in a different fashion compared to
4331 // other MIMG instructions. The only useful DMASK values are
4332 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4333 // (red,red,red,red) etc.) The ISA document doesn't mention
4334 // this.
4335 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4336}
4337
4338bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst,
4339 const OperandVector &Operands) {
4340 if (!isGFX10Plus())
4341 return true;
4342
4343 const unsigned Opc = Inst.getOpcode();
4344 const MCInstrDesc &Desc = MII.get(Opc);
4345
4346 if ((Desc.TSFlags & MIMGFlags) == 0)
4347 return true;
4348
4349 // image_bvh_intersect_ray instructions do not have dim
4351 return true;
4352
4353 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4354 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4355 if (Op.isDim())
4356 return true;
4357 }
4358 return false;
4359}
4360
4361bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4362 const unsigned Opc = Inst.getOpcode();
4363 const MCInstrDesc &Desc = MII.get(Opc);
4364
4365 if ((Desc.TSFlags & MIMGFlags) == 0)
4366 return true;
4367
4368 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4369 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4371
4372 if (!BaseOpcode->MSAA)
4373 return true;
4374
4375 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4376 assert(DimIdx != -1);
4377
4378 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4379 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4380
4381 return DimInfo->MSAA;
4382}
4383
4384static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4385{
4386 switch (Opcode) {
4387 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4388 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4389 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4390 return true;
4391 default:
4392 return false;
4393 }
4394}
4395
4396// movrels* opcodes should only allow VGPRS as src0.
4397// This is specified in .td description for vop1/vop3,
4398// but sdwa is handled differently. See isSDWAOperand.
4399bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4400 const OperandVector &Operands) {
4401
4402 const unsigned Opc = Inst.getOpcode();
4403 const MCInstrDesc &Desc = MII.get(Opc);
4404
4405 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4406 return true;
4407
4408 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4409 assert(Src0Idx != -1);
4410
4411 SMLoc ErrLoc;
4412 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4413 if (Src0.isReg()) {
4414 auto Reg = mc2PseudoReg(Src0.getReg());
4415 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4416 if (!isSGPR(Reg, TRI))
4417 return true;
4418 ErrLoc = getRegLoc(Reg, Operands);
4419 } else {
4420 ErrLoc = getConstLoc(Operands);
4421 }
4422
4423 Error(ErrLoc, "source operand must be a VGPR");
4424 return false;
4425}
4426
4427bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4428 const OperandVector &Operands) {
4429
4430 const unsigned Opc = Inst.getOpcode();
4431
4432 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4433 return true;
4434
4435 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4436 assert(Src0Idx != -1);
4437
4438 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4439 if (!Src0.isReg())
4440 return true;
4441
4442 auto Reg = mc2PseudoReg(Src0.getReg());
4443 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4444 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4445 Error(getRegLoc(Reg, Operands),
4446 "source operand must be either a VGPR or an inline constant");
4447 return false;
4448 }
4449
4450 return true;
4451}
4452
4453bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4454 const OperandVector &Operands) {
4455 unsigned Opcode = Inst.getOpcode();
4456 const MCInstrDesc &Desc = MII.get(Opcode);
4457
4458 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4459 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4460 return true;
4461
4462 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4463 if (Src2Idx == -1)
4464 return true;
4465
4466 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4467 Error(getConstLoc(Operands),
4468 "inline constants are not allowed for this operand");
4469 return false;
4470 }
4471
4472 return true;
4473}
4474
4475bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4476 const OperandVector &Operands) {
4477 const unsigned Opc = Inst.getOpcode();
4478 const MCInstrDesc &Desc = MII.get(Opc);
4479
4480 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4481 return true;
4482
4483 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4484 if (BlgpIdx != -1) {
4485 if (const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(Opc)) {
4486 int CbszIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
4487
4488 unsigned CBSZ = Inst.getOperand(CbszIdx).getImm();
4489 unsigned BLGP = Inst.getOperand(BlgpIdx).getImm();
4490
4491 // Validate the correct register size was used for the floating point
4492 // format operands
4493
4494 bool Success = true;
4495 if (Info->NumRegsSrcA != mfmaScaleF8F6F4FormatToNumRegs(CBSZ)) {
4496 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4497 Error(getRegLoc(mc2PseudoReg(Inst.getOperand(Src0Idx).getReg()),
4498 Operands),
4499 "wrong register tuple size for cbsz value " + Twine(CBSZ));
4500 Success = false;
4501 }
4502
4503 if (Info->NumRegsSrcB != mfmaScaleF8F6F4FormatToNumRegs(BLGP)) {
4504 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4505 Error(getRegLoc(mc2PseudoReg(Inst.getOperand(Src1Idx).getReg()),
4506 Operands),
4507 "wrong register tuple size for blgp value " + Twine(BLGP));
4508 Success = false;
4509 }
4510
4511 return Success;
4512 }
4513 }
4514
4515 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4516 if (Src2Idx == -1)
4517 return true;
4518
4519 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4520 if (!Src2.isReg())
4521 return true;
4522
4523 MCRegister Src2Reg = Src2.getReg();
4524 MCRegister DstReg = Inst.getOperand(0).getReg();
4525 if (Src2Reg == DstReg)
4526 return true;
4527
4528 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4529 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
4530 return true;
4531
4532 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4533 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
4534 "source 2 operand must not partially overlap with dst");
4535 return false;
4536 }
4537
4538 return true;
4539}
4540
4541bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4542 switch (Inst.getOpcode()) {
4543 default:
4544 return true;
4545 case V_DIV_SCALE_F32_gfx6_gfx7:
4546 case V_DIV_SCALE_F32_vi:
4547 case V_DIV_SCALE_F32_gfx10:
4548 case V_DIV_SCALE_F64_gfx6_gfx7:
4549 case V_DIV_SCALE_F64_vi:
4550 case V_DIV_SCALE_F64_gfx10:
4551 break;
4552 }
4553
4554 // TODO: Check that src0 = src1 or src2.
4555
4556 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4557 AMDGPU::OpName::src2_modifiers,
4558 AMDGPU::OpName::src2_modifiers}) {
4559 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
4560 .getImm() &
4562 return false;
4563 }
4564 }
4565
4566 return true;
4567}
4568
4569bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4570
4571 const unsigned Opc = Inst.getOpcode();
4572 const MCInstrDesc &Desc = MII.get(Opc);
4573
4574 if ((Desc.TSFlags & MIMGFlags) == 0)
4575 return true;
4576
4577 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4578 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4579 if (isCI() || isSI())
4580 return false;
4581 }
4582
4583 return true;
4584}
4585
4586bool AMDGPUAsmParser::validateTensorR128(const MCInst &Inst) {
4587 const unsigned Opc = Inst.getOpcode();
4588 const MCInstrDesc &Desc = MII.get(Opc);
4589
4590 if ((Desc.TSFlags & SIInstrFlags::TENSOR_CNT) == 0)
4591 return true;
4592
4593 int R128Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
4594
4595 return R128Idx < 0 || !Inst.getOperand(R128Idx).getImm();
4596}
4597
4598static bool IsRevOpcode(const unsigned Opcode)
4599{
4600 switch (Opcode) {
4601 case AMDGPU::V_SUBREV_F32_e32:
4602 case AMDGPU::V_SUBREV_F32_e64:
4603 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4604 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4605 case AMDGPU::V_SUBREV_F32_e32_vi:
4606 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4607 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4608 case AMDGPU::V_SUBREV_F32_e64_vi:
4609
4610 case AMDGPU::V_SUBREV_CO_U32_e32:
4611 case AMDGPU::V_SUBREV_CO_U32_e64:
4612 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4613 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4614
4615 case AMDGPU::V_SUBBREV_U32_e32:
4616 case AMDGPU::V_SUBBREV_U32_e64:
4617 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4618 case AMDGPU::V_SUBBREV_U32_e32_vi:
4619 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4620 case AMDGPU::V_SUBBREV_U32_e64_vi:
4621
4622 case AMDGPU::V_SUBREV_U32_e32:
4623 case AMDGPU::V_SUBREV_U32_e64:
4624 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4625 case AMDGPU::V_SUBREV_U32_e32_vi:
4626 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4627 case AMDGPU::V_SUBREV_U32_e64_vi:
4628
4629 case AMDGPU::V_SUBREV_F16_e32:
4630 case AMDGPU::V_SUBREV_F16_e64:
4631 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4632 case AMDGPU::V_SUBREV_F16_e32_vi:
4633 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4634 case AMDGPU::V_SUBREV_F16_e64_vi:
4635
4636 case AMDGPU::V_SUBREV_U16_e32:
4637 case AMDGPU::V_SUBREV_U16_e64:
4638 case AMDGPU::V_SUBREV_U16_e32_vi:
4639 case AMDGPU::V_SUBREV_U16_e64_vi:
4640
4641 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4642 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4643 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4644
4645 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4646 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4647
4648 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4649 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4650
4651 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4652 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4653
4654 case AMDGPU::V_LSHRREV_B32_e32:
4655 case AMDGPU::V_LSHRREV_B32_e64:
4656 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4657 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4658 case AMDGPU::V_LSHRREV_B32_e32_vi:
4659 case AMDGPU::V_LSHRREV_B32_e64_vi:
4660 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4661 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4662
4663 case AMDGPU::V_ASHRREV_I32_e32:
4664 case AMDGPU::V_ASHRREV_I32_e64:
4665 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4666 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4667 case AMDGPU::V_ASHRREV_I32_e32_vi:
4668 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4669 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4670 case AMDGPU::V_ASHRREV_I32_e64_vi:
4671
4672 case AMDGPU::V_LSHLREV_B32_e32:
4673 case AMDGPU::V_LSHLREV_B32_e64:
4674 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4675 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4676 case AMDGPU::V_LSHLREV_B32_e32_vi:
4677 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4678 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4679 case AMDGPU::V_LSHLREV_B32_e64_vi:
4680
4681 case AMDGPU::V_LSHLREV_B16_e32:
4682 case AMDGPU::V_LSHLREV_B16_e64:
4683 case AMDGPU::V_LSHLREV_B16_e32_vi:
4684 case AMDGPU::V_LSHLREV_B16_e64_vi:
4685 case AMDGPU::V_LSHLREV_B16_gfx10:
4686
4687 case AMDGPU::V_LSHRREV_B16_e32:
4688 case AMDGPU::V_LSHRREV_B16_e64:
4689 case AMDGPU::V_LSHRREV_B16_e32_vi:
4690 case AMDGPU::V_LSHRREV_B16_e64_vi:
4691 case AMDGPU::V_LSHRREV_B16_gfx10:
4692
4693 case AMDGPU::V_ASHRREV_I16_e32:
4694 case AMDGPU::V_ASHRREV_I16_e64:
4695 case AMDGPU::V_ASHRREV_I16_e32_vi:
4696 case AMDGPU::V_ASHRREV_I16_e64_vi:
4697 case AMDGPU::V_ASHRREV_I16_gfx10:
4698
4699 case AMDGPU::V_LSHLREV_B64_e64:
4700 case AMDGPU::V_LSHLREV_B64_gfx10:
4701 case AMDGPU::V_LSHLREV_B64_vi:
4702
4703 case AMDGPU::V_LSHRREV_B64_e64:
4704 case AMDGPU::V_LSHRREV_B64_gfx10:
4705 case AMDGPU::V_LSHRREV_B64_vi:
4706
4707 case AMDGPU::V_ASHRREV_I64_e64:
4708 case AMDGPU::V_ASHRREV_I64_gfx10:
4709 case AMDGPU::V_ASHRREV_I64_vi:
4710
4711 case AMDGPU::V_PK_LSHLREV_B16:
4712 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4713 case AMDGPU::V_PK_LSHLREV_B16_vi:
4714
4715 case AMDGPU::V_PK_LSHRREV_B16:
4716 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4717 case AMDGPU::V_PK_LSHRREV_B16_vi:
4718 case AMDGPU::V_PK_ASHRREV_I16:
4719 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4720 case AMDGPU::V_PK_ASHRREV_I16_vi:
4721 return true;
4722 default:
4723 return false;
4724 }
4725}
4726
4727std::optional<StringRef>
4728AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4729
4730 using namespace SIInstrFlags;
4731 const unsigned Opcode = Inst.getOpcode();
4732 const MCInstrDesc &Desc = MII.get(Opcode);
4733
4734 // lds_direct register is defined so that it can be used
4735 // with 9-bit operands only. Ignore encodings which do not accept these.
4736 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4737 if ((Desc.TSFlags & Enc) == 0)
4738 return std::nullopt;
4739
4740 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4741 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4742 if (SrcIdx == -1)
4743 break;
4744 const auto &Src = Inst.getOperand(SrcIdx);
4745 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4746
4747 if (isGFX90A() || isGFX11Plus())
4748 return StringRef("lds_direct is not supported on this GPU");
4749
4750 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4751 return StringRef("lds_direct cannot be used with this instruction");
4752
4753 if (SrcName != OpName::src0)
4754 return StringRef("lds_direct may be used as src0 only");
4755 }
4756 }
4757
4758 return std::nullopt;
4759}
4760
4761SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4762 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4763 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4764 if (Op.isFlatOffset())
4765 return Op.getStartLoc();
4766 }
4767 return getLoc();
4768}
4769
4770bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4771 const OperandVector &Operands) {
4772 auto Opcode = Inst.getOpcode();
4773 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4774 if (OpNum == -1)
4775 return true;
4776
4777 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4778 if ((TSFlags & SIInstrFlags::FLAT))
4779 return validateFlatOffset(Inst, Operands);
4780
4781 if ((TSFlags & SIInstrFlags::SMRD))
4782 return validateSMEMOffset(Inst, Operands);
4783
4784 const auto &Op = Inst.getOperand(OpNum);
4785 // GFX12+ buffer ops: InstOffset is signed 24, but must not be a negative.
4786 if (isGFX12Plus() &&
4787 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4788 const unsigned OffsetSize = 24;
4789 if (!isUIntN(OffsetSize - 1, Op.getImm())) {
4790 Error(getFlatOffsetLoc(Operands),
4791 Twine("expected a ") + Twine(OffsetSize - 1) +
4792 "-bit unsigned offset for buffer ops");
4793 return false;
4794 }
4795 } else {
4796 const unsigned OffsetSize = 16;
4797 if (!isUIntN(OffsetSize, Op.getImm())) {
4798 Error(getFlatOffsetLoc(Operands),
4799 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4800 return false;
4801 }
4802 }
4803 return true;
4804}
4805
4806bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4807 const OperandVector &Operands) {
4808 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4809 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4810 return true;
4811
4812 auto Opcode = Inst.getOpcode();
4813 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4814 assert(OpNum != -1);
4815
4816 const auto &Op = Inst.getOperand(OpNum);
4817 if (!hasFlatOffsets() && Op.getImm() != 0) {
4818 Error(getFlatOffsetLoc(Operands),
4819 "flat offset modifier is not supported on this GPU");
4820 return false;
4821 }
4822
4823 // For pre-GFX12 FLAT instructions the offset must be positive;
4824 // MSB is ignored and forced to zero.
4825 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4826 bool AllowNegative =
4828 isGFX12Plus();
4829 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4830 Error(getFlatOffsetLoc(Operands),
4831 Twine("expected a ") +
4832 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4833 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4834 return false;
4835 }
4836
4837 return true;
4838}
4839
4840SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4841 // Start with second operand because SMEM Offset cannot be dst or src0.
4842 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4843 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4844 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4845 return Op.getStartLoc();
4846 }
4847 return getLoc();
4848}
4849
4850bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4851 const OperandVector &Operands) {
4852 if (isCI() || isSI())
4853 return true;
4854
4855 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4856 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4857 return true;
4858
4859 auto Opcode = Inst.getOpcode();
4860 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4861 if (OpNum == -1)
4862 return true;
4863
4864 const auto &Op = Inst.getOperand(OpNum);
4865 if (!Op.isImm())
4866 return true;
4867
4868 uint64_t Offset = Op.getImm();
4869 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4872 return true;
4873
4874 Error(getSMEMOffsetLoc(Operands),
4875 isGFX12Plus() && IsBuffer
4876 ? "expected a 23-bit unsigned offset for buffer ops"
4877 : isGFX12Plus() ? "expected a 24-bit signed offset"
4878 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4879 : "expected a 21-bit signed offset");
4880
4881 return false;
4882}
4883
4884bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4885 unsigned Opcode = Inst.getOpcode();
4886 const MCInstrDesc &Desc = MII.get(Opcode);
4887 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4888 return true;
4889
4890 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4891 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4892
4893 const int OpIndices[] = { Src0Idx, Src1Idx };
4894
4895 unsigned NumExprs = 0;
4896 unsigned NumLiterals = 0;
4897 uint64_t LiteralValue;
4898
4899 for (int OpIdx : OpIndices) {
4900 if (OpIdx == -1) break;
4901
4902 const MCOperand &MO = Inst.getOperand(OpIdx);
4903 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4905 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4906 uint64_t Value = static_cast<uint64_t>(MO.getImm());
4907 if (NumLiterals == 0 || LiteralValue != Value) {
4909 ++NumLiterals;
4910 }
4911 } else if (MO.isExpr()) {
4912 ++NumExprs;
4913 }
4914 }
4915 }
4916
4917 return NumLiterals + NumExprs <= 1;
4918}
4919
4920bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4921 const unsigned Opc = Inst.getOpcode();
4922 if (isPermlane16(Opc)) {
4923 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4924 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4925
4926 if (OpSel & ~3)
4927 return false;
4928 }
4929
4930 uint64_t TSFlags = MII.get(Opc).TSFlags;
4931
4932 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4933 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4934 if (OpSelIdx != -1) {
4935 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4936 return false;
4937 }
4938 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4939 if (OpSelHiIdx != -1) {
4940 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4941 return false;
4942 }
4943 }
4944
4945 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4946 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4947 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4948 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4949 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4950 if (OpSel & 3)
4951 return false;
4952 }
4953
4954 // Packed math FP32 instructions typically accept SGPRs or VGPRs as source
4955 // operands. On gfx12+, if a source operand uses SGPRs, the HW can only read
4956 // the first SGPR and use it for both the low and high operations.
4957 if (isPackedFP32Inst(Opc) && isGFX12Plus()) {
4958 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4959 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4960 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4961 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4962
4963 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4964 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4965 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4966 unsigned OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
4967
4968 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4969
4970 auto VerifyOneSGPR = [OpSel, OpSelHi](unsigned Index) -> bool {
4971 unsigned Mask = 1U << Index;
4972 return ((OpSel & Mask) == 0) && ((OpSelHi & Mask) == 0);
4973 };
4974
4975 if (Src0.isReg() && isSGPR(Src0.getReg(), TRI) &&
4976 !VerifyOneSGPR(/*Index=*/0))
4977 return false;
4978 if (Src1.isReg() && isSGPR(Src1.getReg(), TRI) &&
4979 !VerifyOneSGPR(/*Index=*/1))
4980 return false;
4981
4982 int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4983 if (Src2Idx != -1) {
4984 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4985 if (Src2.isReg() && isSGPR(Src2.getReg(), TRI) &&
4986 !VerifyOneSGPR(/*Index=*/2))
4987 return false;
4988 }
4989 }
4990
4991 return true;
4992}
4993
4994bool AMDGPUAsmParser::validateTrue16OpSel(const MCInst &Inst) {
4995 if (!hasTrue16Insts())
4996 return true;
4997 const MCRegisterInfo *MRI = getMRI();
4998 const unsigned Opc = Inst.getOpcode();
4999 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5000 if (OpSelIdx == -1)
5001 return true;
5002 unsigned OpSelOpValue = Inst.getOperand(OpSelIdx).getImm();
5003 // If the value is 0 we could have a default OpSel Operand, so conservatively
5004 // allow it.
5005 if (OpSelOpValue == 0)
5006 return true;
5007 unsigned OpCount = 0;
5008 for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
5009 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
5010 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), OpName);
5011 if (OpIdx == -1)
5012 continue;
5013 const MCOperand &Op = Inst.getOperand(OpIdx);
5014 if (Op.isReg() &&
5015 MRI->getRegClass(AMDGPU::VGPR_16RegClassID).contains(Op.getReg())) {
5016 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Op.getReg(), *MRI);
5017 bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);
5018 if (OpSelOpIsHi != VGPRSuffixIsHi)
5019 return false;
5020 }
5021 ++OpCount;
5022 }
5023
5024 return true;
5025}
5026
5027bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, AMDGPU::OpName OpName) {
5028 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
5029
5030 const unsigned Opc = Inst.getOpcode();
5031 uint64_t TSFlags = MII.get(Opc).TSFlags;
5032
5033 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
5034 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
5035 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
5036 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
5037 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
5038 !(TSFlags & SIInstrFlags::IsSWMMAC))
5039 return true;
5040
5041 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
5042 if (NegIdx == -1)
5043 return true;
5044
5045 unsigned Neg = Inst.getOperand(NegIdx).getImm();
5046
5047 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
5048 // on some src operands but not allowed on other.
5049 // It is convenient that such instructions don't have src_modifiers operand
5050 // for src operands that don't allow neg because they also don't allow opsel.
5051
5052 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
5053 AMDGPU::OpName::src1_modifiers,
5054 AMDGPU::OpName::src2_modifiers};
5055
5056 for (unsigned i = 0; i < 3; ++i) {
5057 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
5058 if (Neg & (1 << i))
5059 return false;
5060 }
5061 }
5062
5063 return true;
5064}
5065
5066bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
5067 const OperandVector &Operands) {
5068 const unsigned Opc = Inst.getOpcode();
5069 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
5070 if (DppCtrlIdx >= 0) {
5071 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
5072
5073 if (!AMDGPU::isLegalDPALU_DPPControl(getSTI(), DppCtrl) &&
5074 AMDGPU::isDPALU_DPP(MII.get(Opc), getSTI())) {
5075 // DP ALU DPP is supported for row_newbcast only on GFX9* and row_share
5076 // only on GFX12.
5077 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
5078 Error(S, isGFX12() ? "DP ALU dpp only supports row_share"
5079 : "DP ALU dpp only supports row_newbcast");
5080 return false;
5081 }
5082 }
5083
5084 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
5085 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
5086
5087 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
5088 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
5089 if (Src1Idx >= 0) {
5090 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
5091 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5092 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
5093 auto Reg = mc2PseudoReg(Inst.getOperand(Src1Idx).getReg());
5094 SMLoc S = getRegLoc(Reg, Operands);
5095 Error(S, "invalid operand for instruction");
5096 return false;
5097 }
5098 if (Src1.isImm()) {
5099 Error(getInstLoc(Operands),
5100 "src1 immediate operand invalid for instruction");
5101 return false;
5102 }
5103 }
5104 }
5105
5106 return true;
5107}
5108
5109// Check if VCC register matches wavefront size
5110bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const {
5111 auto FB = getFeatureBits();
5112 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
5113 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
5114}
5115
5116// One unique literal can be used. VOP3 literal is only allowed in GFX10+
5117bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
5118 const OperandVector &Operands) {
5119 unsigned Opcode = Inst.getOpcode();
5120 const MCInstrDesc &Desc = MII.get(Opcode);
5121 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
5122 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
5123 !HasMandatoryLiteral && !isVOPD(Opcode))
5124 return true;
5125
5126 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
5127
5128 unsigned NumExprs = 0;
5129 unsigned NumLiterals = 0;
5130 uint64_t LiteralValue;
5131
5132 for (int OpIdx : OpIndices) {
5133 if (OpIdx == -1)
5134 continue;
5135
5136 const MCOperand &MO = Inst.getOperand(OpIdx);
5137 if (!MO.isImm() && !MO.isExpr())
5138 continue;
5139 if (!isSISrcOperand(Desc, OpIdx))
5140 continue;
5141
5142 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
5143 uint64_t Value = static_cast<uint64_t>(MO.getImm());
5144 bool IsForcedFP64 =
5145 Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_KIMM64 ||
5147 HasMandatoryLiteral);
5148 bool IsFP64 = (IsForcedFP64 || AMDGPU::isSISrcFPOperand(Desc, OpIdx)) &&
5149 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
5150 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
5151
5152 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value) &&
5153 !IsForcedFP64 && (!has64BitLiterals() || Desc.getSize() != 4)) {
5154 Error(getLitLoc(Operands), "invalid operand for instruction");
5155 return false;
5156 }
5157
5158 if (IsFP64 && IsValid32Op && !IsForcedFP64)
5159 Value = Hi_32(Value);
5160
5161 if (NumLiterals == 0 || LiteralValue != Value) {
5163 ++NumLiterals;
5164 }
5165 } else if (MO.isExpr()) {
5166 ++NumExprs;
5167 }
5168 }
5169 NumLiterals += NumExprs;
5170
5171 if (!NumLiterals)
5172 return true;
5173
5174 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
5175 Error(getLitLoc(Operands), "literal operands are not supported");
5176 return false;
5177 }
5178
5179 if (NumLiterals > 1) {
5180 Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
5181 return false;
5182 }
5183
5184 return true;
5185}
5186
5187// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
5188static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name,
5189 const MCRegisterInfo *MRI) {
5190 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name);
5191 if (OpIdx < 0)
5192 return -1;
5193
5194 const MCOperand &Op = Inst.getOperand(OpIdx);
5195 if (!Op.isReg())
5196 return -1;
5197
5198 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5199 auto Reg = Sub ? Sub : Op.getReg();
5200 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5201 return AGPR32.contains(Reg) ? 1 : 0;
5202}
5203
5204bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
5205 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5206 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
5208 SIInstrFlags::DS)) == 0)
5209 return true;
5210
5211 AMDGPU::OpName DataName = (TSFlags & SIInstrFlags::DS)
5212 ? AMDGPU::OpName::data0
5213 : AMDGPU::OpName::vdata;
5214
5215 const MCRegisterInfo *MRI = getMRI();
5216 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
5217 int DataAreg = IsAGPROperand(Inst, DataName, MRI);
5218
5219 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
5220 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
5221 if (Data2Areg >= 0 && Data2Areg != DataAreg)
5222 return false;
5223 }
5224
5225 auto FB = getFeatureBits();
5226 if (FB[AMDGPU::FeatureGFX90AInsts]) {
5227 if (DataAreg < 0 || DstAreg < 0)
5228 return true;
5229 return DstAreg == DataAreg;
5230 }
5231
5232 return DstAreg < 1 && DataAreg < 1;
5233}
5234
5235bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
5236 auto FB = getFeatureBits();
5237 if (!FB[AMDGPU::FeatureRequiresAlignedVGPRs])
5238 return true;
5239
5240 unsigned Opc = Inst.getOpcode();
5241 const MCRegisterInfo *MRI = getMRI();
5242 // DS_READ_B96_TR_B6 is the only DS instruction in GFX950, that allows
5243 // unaligned VGPR. All others only allow even aligned VGPRs.
5244 if (FB[AMDGPU::FeatureGFX90AInsts] && Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5245 return true;
5246
5247 if (FB[AMDGPU::FeatureGFX1250Insts]) {
5248 switch (Opc) {
5249 default:
5250 break;
5251 case AMDGPU::DS_LOAD_TR6_B96:
5252 case AMDGPU::DS_LOAD_TR6_B96_gfx12:
5253 // DS_LOAD_TR6_B96 is the only DS instruction in GFX1250, that
5254 // allows unaligned VGPR. All others only allow even aligned VGPRs.
5255 return true;
5256 case AMDGPU::GLOBAL_LOAD_TR6_B96:
5257 case AMDGPU::GLOBAL_LOAD_TR6_B96_gfx1250: {
5258 // GLOBAL_LOAD_TR6_B96 is the only GLOBAL instruction in GFX1250, that
5259 // allows unaligned VGPR for vdst, but other operands still only allow
5260 // even aligned VGPRs.
5261 int VAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
5262 if (VAddrIdx != -1) {
5263 const MCOperand &Op = Inst.getOperand(VAddrIdx);
5264 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5265 if ((Sub - AMDGPU::VGPR0) & 1)
5266 return false;
5267 }
5268 return true;
5269 }
5270 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR:
5271 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR_gfx1250:
5272 return true;
5273 }
5274 }
5275
5276 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5277 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5278 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
5279 const MCOperand &Op = Inst.getOperand(I);
5280 if (!Op.isReg())
5281 continue;
5282
5283 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5284 if (!Sub)
5285 continue;
5286
5287 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
5288 return false;
5289 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
5290 return false;
5291 }
5292
5293 return true;
5294}
5295
5296SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
5297 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5298 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5299 if (Op.isBLGP())
5300 return Op.getStartLoc();
5301 }
5302 return SMLoc();
5303}
5304
5305bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
5306 const OperandVector &Operands) {
5307 unsigned Opc = Inst.getOpcode();
5308 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
5309 if (BlgpIdx == -1)
5310 return true;
5311 SMLoc BLGPLoc = getBLGPLoc(Operands);
5312 if (!BLGPLoc.isValid())
5313 return true;
5314 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
5315 auto FB = getFeatureBits();
5316 bool UsesNeg = false;
5317 if (FB[AMDGPU::FeatureGFX940Insts]) {
5318 switch (Opc) {
5319 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
5320 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
5321 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
5322 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
5323 UsesNeg = true;
5324 }
5325 }
5326
5327 if (IsNeg == UsesNeg)
5328 return true;
5329
5330 Error(BLGPLoc,
5331 UsesNeg ? "invalid modifier: blgp is not supported"
5332 : "invalid modifier: neg is not supported");
5333
5334 return false;
5335}
5336
5337bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
5338 const OperandVector &Operands) {
5339 if (!isGFX11Plus())
5340 return true;
5341
5342 unsigned Opc = Inst.getOpcode();
5343 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
5344 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
5345 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
5346 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
5347 return true;
5348
5349 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
5350 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
5351 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
5352 if (Reg == AMDGPU::SGPR_NULL)
5353 return true;
5354
5355 SMLoc RegLoc = getRegLoc(Reg, Operands);
5356 Error(RegLoc, "src0 must be null");
5357 return false;
5358}
5359
5360bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
5361 const OperandVector &Operands) {
5362 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5363 if ((TSFlags & SIInstrFlags::DS) == 0)
5364 return true;
5365 if (TSFlags & SIInstrFlags::GWS)
5366 return validateGWS(Inst, Operands);
5367 // Only validate GDS for non-GWS instructions.
5368 if (hasGDS())
5369 return true;
5370 int GDSIdx =
5371 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
5372 if (GDSIdx < 0)
5373 return true;
5374 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
5375 if (GDS) {
5376 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
5377 Error(S, "gds modifier is not supported on this GPU");
5378 return false;
5379 }
5380 return true;
5381}
5382
5383// gfx90a has an undocumented limitation:
5384// DS_GWS opcodes must use even aligned registers.
5385bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
5386 const OperandVector &Operands) {
5387 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5388 return true;
5389
5390 int Opc = Inst.getOpcode();
5391 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5392 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5393 return true;
5394
5395 const MCRegisterInfo *MRI = getMRI();
5396 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5397 int Data0Pos =
5398 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
5399 assert(Data0Pos != -1);
5400 auto Reg = Inst.getOperand(Data0Pos).getReg();
5401 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5402 if (RegIdx & 1) {
5403 SMLoc RegLoc = getRegLoc(Reg, Operands);
5404 Error(RegLoc, "vgpr must be even aligned");
5405 return false;
5406 }
5407
5408 return true;
5409}
5410
5411bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
5412 const OperandVector &Operands,
5413 const SMLoc &IDLoc) {
5414 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
5415 AMDGPU::OpName::cpol);
5416 if (CPolPos == -1)
5417 return true;
5418
5419 unsigned CPol = Inst.getOperand(CPolPos).getImm();
5420
5421 if (!isGFX1250()) {
5422 if (CPol & CPol::SCAL) {
5423 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5424 StringRef CStr(S.getPointer());
5425 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5426 Error(S, "scale_offset is not supported on this GPU");
5427 }
5428 if (CPol & CPol::NV) {
5429 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5430 StringRef CStr(S.getPointer());
5431 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]);
5432 Error(S, "nv is not supported on this GPU");
5433 }
5434 }
5435
5436 if ((CPol & CPol::SCAL) && !supportsScaleOffset(MII, Inst.getOpcode())) {
5437 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5438 StringRef CStr(S.getPointer());
5439 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5440 Error(S, "scale_offset is not supported for this instruction");
5441 }
5442
5443 if (isGFX12Plus())
5444 return validateTHAndScopeBits(Inst, Operands, CPol);
5445
5446 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5447 if (TSFlags & SIInstrFlags::SMRD) {
5448 if (CPol && (isSI() || isCI())) {
5449 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5450 Error(S, "cache policy is not supported for SMRD instructions");
5451 return false;
5452 }
5453 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
5454 Error(IDLoc, "invalid cache policy for SMEM instruction");
5455 return false;
5456 }
5457 }
5458
5459 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
5460 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
5463 if (!(TSFlags & AllowSCCModifier)) {
5464 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5465 StringRef CStr(S.getPointer());
5466 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
5467 Error(S,
5468 "scc modifier is not supported for this instruction on this GPU");
5469 return false;
5470 }
5471 }
5472
5474 return true;
5475
5476 if (TSFlags & SIInstrFlags::IsAtomicRet) {
5477 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
5478 Error(IDLoc, isGFX940() ? "instruction must use sc0"
5479 : "instruction must use glc");
5480 return false;
5481 }
5482 } else {
5483 if (CPol & CPol::GLC) {
5484 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5485 StringRef CStr(S.getPointer());
5487 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
5488 Error(S, isGFX940() ? "instruction must not use sc0"
5489 : "instruction must not use glc");
5490 return false;
5491 }
5492 }
5493
5494 return true;
5495}
5496
5497bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
5498 const OperandVector &Operands,
5499 const unsigned CPol) {
5500 const unsigned TH = CPol & AMDGPU::CPol::TH;
5501 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
5502
5503 const unsigned Opcode = Inst.getOpcode();
5504 const MCInstrDesc &TID = MII.get(Opcode);
5505
5506 auto PrintError = [&](StringRef Msg) {
5507 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5508 Error(S, Msg);
5509 return false;
5510 };
5511
5512 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
5515 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
5516
5517 if (TH == 0)
5518 return true;
5519
5520 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
5521 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
5522 (TH == AMDGPU::CPol::TH_NT_HT)))
5523 return PrintError("invalid th value for SMEM instruction");
5524
5525 if (TH == AMDGPU::CPol::TH_BYPASS) {
5526 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5528 (Scope == AMDGPU::CPol::SCOPE_SYS &&
5530 return PrintError("scope and th combination is not valid");
5531 }
5532
5533 unsigned THType = AMDGPU::getTemporalHintType(TID);
5534 if (THType == AMDGPU::CPol::TH_TYPE_ATOMIC) {
5535 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5536 return PrintError("invalid th value for atomic instructions");
5537 } else if (THType == AMDGPU::CPol::TH_TYPE_STORE) {
5538 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5539 return PrintError("invalid th value for store instructions");
5540 } else {
5541 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5542 return PrintError("invalid th value for load instructions");
5543 }
5544
5545 return true;
5546}
5547
5548bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5549 const OperandVector &Operands) {
5550 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5551 if (Desc.mayStore() &&
5553 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5554 if (Loc != getInstLoc(Operands)) {
5555 Error(Loc, "TFE modifier has no meaning for store instructions");
5556 return false;
5557 }
5558 }
5559
5560 return true;
5561}
5562
5563bool AMDGPUAsmParser::validateSetVgprMSB(const MCInst &Inst,
5564 const OperandVector &Operands) {
5565 if (Inst.getOpcode() != AMDGPU::S_SET_VGPR_MSB_gfx12)
5566 return true;
5567
5568 int Simm16Pos =
5569 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::simm16);
5570 if ((unsigned)Inst.getOperand(Simm16Pos).getImm() > 255) {
5571 SMLoc Loc = Operands[1]->getStartLoc();
5572 Error(Loc, "s_set_vgpr_msb accepts values in range [0..255]");
5573 return false;
5574 }
5575
5576 return true;
5577}
5578
5579bool AMDGPUAsmParser::validateWMMA(const MCInst &Inst,
5580 const OperandVector &Operands) {
5581 unsigned Opc = Inst.getOpcode();
5582 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5583 const MCInstrDesc &Desc = MII.get(Opc);
5584
5585 auto validateFmt = [&](AMDGPU::OpName FmtOp, AMDGPU::OpName SrcOp) -> bool {
5586 int FmtIdx = AMDGPU::getNamedOperandIdx(Opc, FmtOp);
5587 if (FmtIdx == -1)
5588 return true;
5589 unsigned Fmt = Inst.getOperand(FmtIdx).getImm();
5590 int SrcIdx = AMDGPU::getNamedOperandIdx(Opc, SrcOp);
5591 unsigned RegSize =
5592 TRI->getRegClass(Desc.operands()[SrcIdx].RegClass).getSizeInBits();
5593
5595 return true;
5596
5597 static const char *FmtNames[] = {"MATRIX_FMT_FP8", "MATRIX_FMT_BF8",
5598 "MATRIX_FMT_FP6", "MATRIX_FMT_BF6",
5599 "MATRIX_FMT_FP4"};
5600
5601 Error(getRegLoc(mc2PseudoReg(Inst.getOperand(SrcIdx).getReg()), Operands),
5602 "wrong register tuple size for " + Twine(FmtNames[Fmt]));
5603 return false;
5604 };
5605
5606 return validateFmt(AMDGPU::OpName::matrix_a_fmt, AMDGPU::OpName::src0) &&
5607 validateFmt(AMDGPU::OpName::matrix_b_fmt, AMDGPU::OpName::src1);
5608}
5609
5610bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
5611 const SMLoc &IDLoc,
5612 const OperandVector &Operands) {
5613 if (auto ErrMsg = validateLdsDirect(Inst)) {
5614 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
5615 return false;
5616 }
5617 if (!validateTrue16OpSel(Inst)) {
5618 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5619 "op_sel operand conflicts with 16-bit operand suffix");
5620 return false;
5621 }
5622 if (!validateSOPLiteral(Inst)) {
5623 Error(getLitLoc(Operands),
5624 "only one unique literal operand is allowed");
5625 return false;
5626 }
5627 if (!validateVOPLiteral(Inst, Operands)) {
5628 return false;
5629 }
5630 if (!validateConstantBusLimitations(Inst, Operands)) {
5631 return false;
5632 }
5633 if (!validateVOPD(Inst, Operands)) {
5634 return false;
5635 }
5636 if (!validateIntClampSupported(Inst)) {
5637 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5638 "integer clamping is not supported on this GPU");
5639 return false;
5640 }
5641 if (!validateOpSel(Inst)) {
5642 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5643 "invalid op_sel operand");
5644 return false;
5645 }
5646 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5647 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5648 "invalid neg_lo operand");
5649 return false;
5650 }
5651 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5652 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5653 "invalid neg_hi operand");
5654 return false;
5655 }
5656 if (!validateDPP(Inst, Operands)) {
5657 return false;
5658 }
5659 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5660 if (!validateMIMGD16(Inst)) {
5661 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5662 "d16 modifier is not supported on this GPU");
5663 return false;
5664 }
5665 if (!validateMIMGDim(Inst, Operands)) {
5666 Error(IDLoc, "missing dim operand");
5667 return false;
5668 }
5669 if (!validateTensorR128(Inst)) {
5670 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5671 "instruction must set modifier r128=0");
5672 return false;
5673 }
5674 if (!validateMIMGMSAA(Inst)) {
5675 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5676 "invalid dim; must be MSAA type");
5677 return false;
5678 }
5679 if (!validateMIMGDataSize(Inst, IDLoc)) {
5680 return false;
5681 }
5682 if (!validateMIMGAddrSize(Inst, IDLoc))
5683 return false;
5684 if (!validateMIMGAtomicDMask(Inst)) {
5685 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5686 "invalid atomic image dmask");
5687 return false;
5688 }
5689 if (!validateMIMGGatherDMask(Inst)) {
5690 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5691 "invalid image_gather dmask: only one bit must be set");
5692 return false;
5693 }
5694 if (!validateMovrels(Inst, Operands)) {
5695 return false;
5696 }
5697 if (!validateOffset(Inst, Operands)) {
5698 return false;
5699 }
5700 if (!validateMAIAccWrite(Inst, Operands)) {
5701 return false;
5702 }
5703 if (!validateMAISrc2(Inst, Operands)) {
5704 return false;
5705 }
5706 if (!validateMFMA(Inst, Operands)) {
5707 return false;
5708 }
5709 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5710 return false;
5711 }
5712
5713 if (!validateAGPRLdSt(Inst)) {
5714 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5715 ? "invalid register class: data and dst should be all VGPR or AGPR"
5716 : "invalid register class: agpr loads and stores not supported on this GPU"
5717 );
5718 return false;
5719 }
5720 if (!validateVGPRAlign(Inst)) {
5721 Error(IDLoc,
5722 "invalid register class: vgpr tuples must be 64 bit aligned");
5723 return false;
5724 }
5725 if (!validateDS(Inst, Operands)) {
5726 return false;
5727 }
5728
5729 if (!validateBLGP(Inst, Operands)) {
5730 return false;
5731 }
5732
5733 if (!validateDivScale(Inst)) {
5734 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5735 return false;
5736 }
5737 if (!validateWaitCnt(Inst, Operands)) {
5738 return false;
5739 }
5740 if (!validateTFE(Inst, Operands)) {
5741 return false;
5742 }
5743 if (!validateSetVgprMSB(Inst, Operands)) {
5744 return false;
5745 }
5746 if (!validateWMMA(Inst, Operands)) {
5747 return false;
5748 }
5749
5750 return true;
5751}
5752
5754 const FeatureBitset &FBS,
5755 unsigned VariantID = 0);
5756
5757static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5758 const FeatureBitset &AvailableFeatures,
5759 unsigned VariantID);
5760
5761bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5762 const FeatureBitset &FBS) {
5763 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5764}
5765
5766bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5767 const FeatureBitset &FBS,
5768 ArrayRef<unsigned> Variants) {
5769 for (auto Variant : Variants) {
5770 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5771 return true;
5772 }
5773
5774 return false;
5775}
5776
5777bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5778 const SMLoc &IDLoc) {
5779 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5780
5781 // Check if requested instruction variant is supported.
5782 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5783 return false;
5784
5785 // This instruction is not supported.
5786 // Clear any other pending errors because they are no longer relevant.
5787 getParser().clearPendingErrors();
5788
5789 // Requested instruction variant is not supported.
5790 // Check if any other variants are supported.
5791 StringRef VariantName = getMatchedVariantName();
5792 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5793 return Error(IDLoc,
5794 Twine(VariantName,
5795 " variant of this instruction is not supported"));
5796 }
5797
5798 // Check if this instruction may be used with a different wavesize.
5799 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5800 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5801
5802 FeatureBitset FeaturesWS32 = getFeatureBits();
5803 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5804 .flip(AMDGPU::FeatureWavefrontSize32);
5805 FeatureBitset AvailableFeaturesWS32 =
5806 ComputeAvailableFeatures(FeaturesWS32);
5807
5808 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5809 return Error(IDLoc, "instruction requires wavesize=32");
5810 }
5811
5812 // Finally check if this instruction is supported on any other GPU.
5813 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5814 return Error(IDLoc, "instruction not supported on this GPU");
5815 }
5816
5817 // Instruction not supported on any GPU. Probably a typo.
5818 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5819 return Error(IDLoc, "invalid instruction" + Suggestion);
5820}
5821
5823 uint64_t InvalidOprIdx) {
5824 assert(InvalidOprIdx < Operands.size());
5825 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5826 if (Op.isToken() && InvalidOprIdx > 1) {
5827 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5828 return PrevOp.isToken() && PrevOp.getToken() == "::";
5829 }
5830 return false;
5831}
5832
5833bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5835 MCStreamer &Out,
5836 uint64_t &ErrorInfo,
5837 bool MatchingInlineAsm) {
5838 MCInst Inst;
5839 Inst.setLoc(IDLoc);
5840 unsigned Result = Match_Success;
5841 for (auto Variant : getMatchedVariants()) {
5842 uint64_t EI;
5843 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5844 Variant);
5845 // We order match statuses from least to most specific. We use most specific
5846 // status as resulting
5847 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5848 if (R == Match_Success || R == Match_MissingFeature ||
5849 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5850 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5851 Result != Match_MissingFeature)) {
5852 Result = R;
5853 ErrorInfo = EI;
5854 }
5855 if (R == Match_Success)
5856 break;
5857 }
5858
5859 if (Result == Match_Success) {
5860 if (!validateInstruction(Inst, IDLoc, Operands)) {
5861 return true;
5862 }
5863 Out.emitInstruction(Inst, getSTI());
5864 return false;
5865 }
5866
5867 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5868 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5869 return true;
5870 }
5871
5872 switch (Result) {
5873 default: break;
5874 case Match_MissingFeature:
5875 // It has been verified that the specified instruction
5876 // mnemonic is valid. A match was found but it requires
5877 // features which are not supported on this GPU.
5878 return Error(IDLoc, "operands are not valid for this GPU or mode");
5879
5880 case Match_InvalidOperand: {
5881 SMLoc ErrorLoc = IDLoc;
5882 if (ErrorInfo != ~0ULL) {
5883 if (ErrorInfo >= Operands.size()) {
5884 return Error(IDLoc, "too few operands for instruction");
5885 }
5886 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5887 if (ErrorLoc == SMLoc())
5888 ErrorLoc = IDLoc;
5889
5890 if (isInvalidVOPDY(Operands, ErrorInfo))
5891 return Error(ErrorLoc, "invalid VOPDY instruction");
5892 }
5893 return Error(ErrorLoc, "invalid operand for instruction");
5894 }
5895
5896 case Match_MnemonicFail:
5897 llvm_unreachable("Invalid instructions should have been handled already");
5898 }
5899 llvm_unreachable("Implement any new match types added!");
5900}
5901
5902bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5903 int64_t Tmp = -1;
5904 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5905 return true;
5906 }
5907 if (getParser().parseAbsoluteExpression(Tmp)) {
5908 return true;
5909 }
5910 Ret = static_cast<uint32_t>(Tmp);
5911 return false;
5912}
5913
5914bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5915 if (!getSTI().getTargetTriple().isAMDGCN())
5916 return TokError("directive only supported for amdgcn architecture");
5917
5918 std::string TargetIDDirective;
5919 SMLoc TargetStart = getTok().getLoc();
5920 if (getParser().parseEscapedString(TargetIDDirective))
5921 return true;
5922
5923 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5924 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5925 return getParser().Error(TargetRange.Start,
5926 (Twine(".amdgcn_target directive's target id ") +
5927 Twine(TargetIDDirective) +
5928 Twine(" does not match the specified target id ") +
5929 Twine(getTargetStreamer().getTargetID()->toString())).str());
5930
5931 return false;
5932}
5933
5934bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5935 return Error(Range.Start, "value out of range", Range);
5936}
5937
5938bool AMDGPUAsmParser::calculateGPRBlocks(
5939 const FeatureBitset &Features, const MCExpr *VCCUsed,
5940 const MCExpr *FlatScrUsed, bool XNACKUsed,
5941 std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
5942 SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5943 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) {
5944 // TODO(scott.linder): These calculations are duplicated from
5945 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5946 IsaVersion Version = getIsaVersion(getSTI().getCPU());
5947 MCContext &Ctx = getContext();
5948
5949 const MCExpr *NumSGPRs = NextFreeSGPR;
5950 int64_t EvaluatedSGPRs;
5951
5952 if (Version.Major >= 10)
5954 else {
5955 unsigned MaxAddressableNumSGPRs =
5957
5958 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && Version.Major >= 8 &&
5959 !Features.test(FeatureSGPRInitBug) &&
5960 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5961 return OutOfRangeError(SGPRRange);
5962
5963 const MCExpr *ExtraSGPRs =
5964 AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
5965 NumSGPRs = MCBinaryExpr::createAdd(NumSGPRs, ExtraSGPRs, Ctx);
5966
5967 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5968 (Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5969 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5970 return OutOfRangeError(SGPRRange);
5971
5972 if (Features.test(FeatureSGPRInitBug))
5973 NumSGPRs =
5975 }
5976
5977 // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
5978 // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
5979 auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
5980 unsigned Granule) -> const MCExpr * {
5981 const MCExpr *OneConst = MCConstantExpr::create(1ul, Ctx);
5982 const MCExpr *GranuleConst = MCConstantExpr::create(Granule, Ctx);
5983 const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);
5984 const MCExpr *AlignToGPR =
5985 AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);
5986 const MCExpr *DivGPR =
5987 MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);
5988 const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
5989 return SubGPR;
5990 };
5991
5992 VGPRBlocks = GetNumGPRBlocks(
5993 NextFreeVGPR,
5994 IsaInfo::getVGPREncodingGranule(&getSTI(), EnableWavefrontSize32));
5995 SGPRBlocks =
5996 GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(&getSTI()));
5997
5998 return false;
5999}
6000
6001bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
6002 if (!getSTI().getTargetTriple().isAMDGCN())
6003 return TokError("directive only supported for amdgcn architecture");
6004
6005 if (!isHsaAbi(getSTI()))
6006 return TokError("directive only supported for amdhsa OS");
6007
6008 StringRef KernelName;
6009 if (getParser().parseIdentifier(KernelName))
6010 return true;
6011
6012 AMDGPU::MCKernelDescriptor KD =
6014 &getSTI(), getContext());
6015
6016 StringSet<> Seen;
6017
6018 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
6019
6020 const MCExpr *ZeroExpr = MCConstantExpr::create(0, getContext());
6021 const MCExpr *OneExpr = MCConstantExpr::create(1, getContext());
6022
6023 SMRange VGPRRange;
6024 const MCExpr *NextFreeVGPR = ZeroExpr;
6025 const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext());
6026 const MCExpr *NamedBarCnt = ZeroExpr;
6027 uint64_t SharedVGPRCount = 0;
6028 uint64_t PreloadLength = 0;
6029 uint64_t PreloadOffset = 0;
6030 SMRange SGPRRange;
6031 const MCExpr *NextFreeSGPR = ZeroExpr;
6032
6033 // Count the number of user SGPRs implied from the enabled feature bits.
6034 unsigned ImpliedUserSGPRCount = 0;
6035
6036 // Track if the asm explicitly contains the directive for the user SGPR
6037 // count.
6038 std::optional<unsigned> ExplicitUserSGPRCount;
6039 const MCExpr *ReserveVCC = OneExpr;
6040 const MCExpr *ReserveFlatScr = OneExpr;
6041 std::optional<bool> EnableWavefrontSize32;
6042
6043 while (true) {
6044 while (trySkipToken(AsmToken::EndOfStatement));
6045
6046 StringRef ID;
6047 SMRange IDRange = getTok().getLocRange();
6048 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
6049 return true;
6050
6051 if (ID == ".end_amdhsa_kernel")
6052 break;
6053
6054 if (!Seen.insert(ID).second)
6055 return TokError(".amdhsa_ directives cannot be repeated");
6056
6057 SMLoc ValStart = getLoc();
6058 const MCExpr *ExprVal;
6059 if (getParser().parseExpression(ExprVal))
6060 return true;
6061 SMLoc ValEnd = getLoc();
6062 SMRange ValRange = SMRange(ValStart, ValEnd);
6063
6064 int64_t IVal = 0;
6065 uint64_t Val = IVal;
6066 bool EvaluatableExpr;
6067 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
6068 if (IVal < 0)
6069 return OutOfRangeError(ValRange);
6070 Val = IVal;
6071 }
6072
6073#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
6074 if (!isUInt<ENTRY##_WIDTH>(Val)) \
6075 return OutOfRangeError(RANGE); \
6076 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
6077 getContext());
6078
6079// Some fields use the parsed value immediately which requires the expression to
6080// be solvable.
6081#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
6082 if (!(RESOLVED)) \
6083 return Error(IDRange.Start, "directive should have resolvable expression", \
6084 IDRange);
6085
6086 if (ID == ".amdhsa_group_segment_fixed_size") {
6088 CHAR_BIT>(Val))
6089 return OutOfRangeError(ValRange);
6090 KD.group_segment_fixed_size = ExprVal;
6091 } else if (ID == ".amdhsa_private_segment_fixed_size") {
6093 CHAR_BIT>(Val))
6094 return OutOfRangeError(ValRange);
6095 KD.private_segment_fixed_size = ExprVal;
6096 } else if (ID == ".amdhsa_kernarg_size") {
6097 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
6098 return OutOfRangeError(ValRange);
6099 KD.kernarg_size = ExprVal;
6100 } else if (ID == ".amdhsa_user_sgpr_count") {
6101 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6102 ExplicitUserSGPRCount = Val;
6103 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
6104 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6106 return Error(IDRange.Start,
6107 "directive is not supported with architected flat scratch",
6108 IDRange);
6110 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
6111 ExprVal, ValRange);
6112 if (Val)
6113 ImpliedUserSGPRCount += 4;
6114 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
6115 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6116 if (!hasKernargPreload())
6117 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6118
6119 if (Val > getMaxNumUserSGPRs())
6120 return OutOfRangeError(ValRange);
6121 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
6122 ValRange);
6123 if (Val) {
6124 ImpliedUserSGPRCount += Val;
6125 PreloadLength = Val;
6126 }
6127 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
6128 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6129 if (!hasKernargPreload())
6130 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6131
6132 if (Val >= 1024)
6133 return OutOfRangeError(ValRange);
6134 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
6135 ValRange);
6136 if (Val)
6137 PreloadOffset = Val;
6138 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
6139 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6141 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
6142 ValRange);
6143 if (Val)
6144 ImpliedUserSGPRCount += 2;
6145 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
6146 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6148 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
6149 ValRange);
6150 if (Val)
6151 ImpliedUserSGPRCount += 2;
6152 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
6153 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6155 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
6156 ExprVal, ValRange);
6157 if (Val)
6158 ImpliedUserSGPRCount += 2;
6159 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
6160 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6162 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
6163 ValRange);
6164 if (Val)
6165 ImpliedUserSGPRCount += 2;
6166 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
6168 return Error(IDRange.Start,
6169 "directive is not supported with architected flat scratch",
6170 IDRange);
6171 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6173 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
6174 ExprVal, ValRange);
6175 if (Val)
6176 ImpliedUserSGPRCount += 2;
6177 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
6178 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6180 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
6181 ExprVal, ValRange);
6182 if (Val)
6183 ImpliedUserSGPRCount += 1;
6184 } else if (ID == ".amdhsa_wavefront_size32") {
6185 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6186 if (IVersion.Major < 10)
6187 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6188 EnableWavefrontSize32 = Val;
6190 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
6191 ValRange);
6192 } else if (ID == ".amdhsa_uses_dynamic_stack") {
6194 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
6195 ValRange);
6196 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
6198 return Error(IDRange.Start,
6199 "directive is not supported with architected flat scratch",
6200 IDRange);
6202 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6203 ValRange);
6204 } else if (ID == ".amdhsa_enable_private_segment") {
6206 return Error(
6207 IDRange.Start,
6208 "directive is not supported without architected flat scratch",
6209 IDRange);
6211 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6212 ValRange);
6213 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
6215 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
6216 ValRange);
6217 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
6219 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
6220 ValRange);
6221 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
6223 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
6224 ValRange);
6225 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
6227 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
6228 ValRange);
6229 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
6231 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
6232 ValRange);
6233 } else if (ID == ".amdhsa_next_free_vgpr") {
6234 VGPRRange = ValRange;
6235 NextFreeVGPR = ExprVal;
6236 } else if (ID == ".amdhsa_next_free_sgpr") {
6237 SGPRRange = ValRange;
6238 NextFreeSGPR = ExprVal;
6239 } else if (ID == ".amdhsa_accum_offset") {
6240 if (!isGFX90A())
6241 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6242 AccumOffset = ExprVal;
6243 } else if (ID == ".amdhsa_named_barrier_count") {
6244 if (!isGFX1250())
6245 return Error(IDRange.Start, "directive requires gfx1250+", IDRange);
6246 NamedBarCnt = ExprVal;
6247 } else if (ID == ".amdhsa_reserve_vcc") {
6248 if (EvaluatableExpr && !isUInt<1>(Val))
6249 return OutOfRangeError(ValRange);
6250 ReserveVCC = ExprVal;
6251 } else if (ID == ".amdhsa_reserve_flat_scratch") {
6252 if (IVersion.Major < 7)
6253 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
6255 return Error(IDRange.Start,
6256 "directive is not supported with architected flat scratch",
6257 IDRange);
6258 if (EvaluatableExpr && !isUInt<1>(Val))
6259 return OutOfRangeError(ValRange);
6260 ReserveFlatScr = ExprVal;
6261 } else if (ID == ".amdhsa_reserve_xnack_mask") {
6262 if (IVersion.Major < 8)
6263 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
6264 if (!isUInt<1>(Val))
6265 return OutOfRangeError(ValRange);
6266 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
6267 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
6268 IDRange);
6269 } else if (ID == ".amdhsa_float_round_mode_32") {
6271 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
6272 ValRange);
6273 } else if (ID == ".amdhsa_float_round_mode_16_64") {
6275 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
6276 ValRange);
6277 } else if (ID == ".amdhsa_float_denorm_mode_32") {
6279 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
6280 ValRange);
6281 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
6283 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
6284 ValRange);
6285 } else if (ID == ".amdhsa_dx10_clamp") {
6286 if (IVersion.Major >= 12)
6287 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
6289 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
6290 ValRange);
6291 } else if (ID == ".amdhsa_ieee_mode") {
6292 if (IVersion.Major >= 12)
6293 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
6295 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
6296 ValRange);
6297 } else if (ID == ".amdhsa_fp16_overflow") {
6298 if (IVersion.Major < 9)
6299 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
6301 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
6302 ValRange);
6303 } else if (ID == ".amdhsa_tg_split") {
6304 if (!isGFX90A())
6305 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6306 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
6307 ExprVal, ValRange);
6308 } else if (ID == ".amdhsa_workgroup_processor_mode") {
6309 if (!supportsWGP(getSTI()))
6310 return Error(IDRange.Start,
6311 "directive unsupported on " + getSTI().getCPU(), IDRange);
6313 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
6314 ValRange);
6315 } else if (ID == ".amdhsa_memory_ordered") {
6316 if (IVersion.Major < 10)
6317 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6319 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
6320 ValRange);
6321 } else if (ID == ".amdhsa_forward_progress") {
6322 if (IVersion.Major < 10)
6323 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6325 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
6326 ValRange);
6327 } else if (ID == ".amdhsa_shared_vgpr_count") {
6328 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6329 if (IVersion.Major < 10 || IVersion.Major >= 12)
6330 return Error(IDRange.Start, "directive requires gfx10 or gfx11",
6331 IDRange);
6332 SharedVGPRCount = Val;
6334 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
6335 ValRange);
6336 } else if (ID == ".amdhsa_inst_pref_size") {
6337 if (IVersion.Major < 11)
6338 return Error(IDRange.Start, "directive requires gfx11+", IDRange);
6339 if (IVersion.Major == 11) {
6341 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
6342 ValRange);
6343 } else {
6345 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
6346 ValRange);
6347 }
6348 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
6351 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
6352 ExprVal, ValRange);
6353 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
6355 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
6356 ExprVal, ValRange);
6357 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
6360 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
6361 ExprVal, ValRange);
6362 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
6364 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
6365 ExprVal, ValRange);
6366 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
6368 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
6369 ExprVal, ValRange);
6370 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
6372 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
6373 ExprVal, ValRange);
6374 } else if (ID == ".amdhsa_exception_int_div_zero") {
6376 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
6377 ExprVal, ValRange);
6378 } else if (ID == ".amdhsa_round_robin_scheduling") {
6379 if (IVersion.Major < 12)
6380 return Error(IDRange.Start, "directive requires gfx12+", IDRange);
6382 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
6383 ValRange);
6384 } else {
6385 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
6386 }
6387
6388#undef PARSE_BITS_ENTRY
6389 }
6390
6391 if (!Seen.contains(".amdhsa_next_free_vgpr"))
6392 return TokError(".amdhsa_next_free_vgpr directive is required");
6393
6394 if (!Seen.contains(".amdhsa_next_free_sgpr"))
6395 return TokError(".amdhsa_next_free_sgpr directive is required");
6396
6397 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
6398
6399 // Consider the case where the total number of UserSGPRs with trailing
6400 // allocated preload SGPRs, is greater than the number of explicitly
6401 // referenced SGPRs.
6402 if (PreloadLength) {
6403 MCContext &Ctx = getContext();
6404 NextFreeSGPR = AMDGPUMCExpr::createMax(
6405 {NextFreeSGPR, MCConstantExpr::create(UserSGPRCount, Ctx)}, Ctx);
6406 }
6407
6408 const MCExpr *VGPRBlocks;
6409 const MCExpr *SGPRBlocks;
6410 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
6411 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
6412 EnableWavefrontSize32, NextFreeVGPR,
6413 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
6414 SGPRBlocks))
6415 return true;
6416
6417 int64_t EvaluatedVGPRBlocks;
6418 bool VGPRBlocksEvaluatable =
6419 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
6420 if (VGPRBlocksEvaluatable &&
6422 static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
6423 return OutOfRangeError(VGPRRange);
6424 }
6426 KD.compute_pgm_rsrc1, VGPRBlocks,
6427 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
6428 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
6429
6430 int64_t EvaluatedSGPRBlocks;
6431 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
6433 static_cast<uint64_t>(EvaluatedSGPRBlocks)))
6434 return OutOfRangeError(SGPRRange);
6436 KD.compute_pgm_rsrc1, SGPRBlocks,
6437 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
6438 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
6439
6440 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
6441 return TokError("amdgpu_user_sgpr_count smaller than than implied by "
6442 "enabled user SGPRs");
6443
6444 if (isGFX1250()) {
6446 return TokError("too many user SGPRs enabled");
6449 MCConstantExpr::create(UserSGPRCount, getContext()),
6450 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
6451 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT, getContext());
6452 } else {
6454 UserSGPRCount))
6455 return TokError("too many user SGPRs enabled");
6458 MCConstantExpr::create(UserSGPRCount, getContext()),
6459 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
6460 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT, getContext());
6461 }
6462
6463 int64_t IVal = 0;
6464 if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
6465 return TokError("Kernarg size should be resolvable");
6466 uint64_t kernarg_size = IVal;
6467 if (PreloadLength && kernarg_size &&
6468 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
6469 return TokError("Kernarg preload length + offset is larger than the "
6470 "kernarg segment size");
6471
6472 if (isGFX90A()) {
6473 if (!Seen.contains(".amdhsa_accum_offset"))
6474 return TokError(".amdhsa_accum_offset directive is required");
6475 int64_t EvaluatedAccum;
6476 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
6477 uint64_t UEvaluatedAccum = EvaluatedAccum;
6478 if (AccumEvaluatable &&
6479 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
6480 return TokError("accum_offset should be in range [4..256] in "
6481 "increments of 4");
6482
6483 int64_t EvaluatedNumVGPR;
6484 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
6485 AccumEvaluatable &&
6486 UEvaluatedAccum >
6487 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
6488 return TokError("accum_offset exceeds total VGPR allocation");
6489 const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
6491 AccumOffset, MCConstantExpr::create(4, getContext()), getContext()),
6494 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6495 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6496 getContext());
6497 }
6498
6499 if (isGFX1250())
6501 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
6502 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
6503 getContext());
6504
6505 if (IVersion.Major >= 10 && IVersion.Major < 12) {
6506 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
6507 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6508 return TokError("shared_vgpr_count directive not valid on "
6509 "wavefront size 32");
6510 }
6511
6512 if (VGPRBlocksEvaluatable &&
6513 (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
6514 63)) {
6515 return TokError("shared_vgpr_count*2 + "
6516 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6517 "exceed 63\n");
6518 }
6519 }
6520
6521 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
6522 NextFreeVGPR, NextFreeSGPR,
6523 ReserveVCC, ReserveFlatScr);
6524 return false;
6525}
6526
6527bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6528 uint32_t Version;
6529 if (ParseAsAbsoluteExpression(Version))
6530 return true;
6531
6532 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
6533 return false;
6534}
6535
6536bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
6537 AMDGPUMCKernelCodeT &C) {
6538 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
6539 // assembly for backwards compatibility.
6540 if (ID == "max_scratch_backing_memory_byte_size") {
6541 Parser.eatToEndOfStatement();
6542 return false;
6543 }
6544
6545 SmallString<40> ErrStr;
6546 raw_svector_ostream Err(ErrStr);
6547 if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
6548 return TokError(Err.str());
6549 }
6550 Lex();
6551
6552 if (ID == "enable_wavefront_size32") {
6553 if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
6554 if (!isGFX10Plus())
6555 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
6556 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
6557 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
6558 } else {
6559 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
6560 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
6561 }
6562 }
6563
6564 if (ID == "wavefront_size") {
6565 if (C.wavefront_size == 5) {
6566 if (!isGFX10Plus())
6567 return TokError("wavefront_size=5 is only allowed on GFX10+");
6568 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
6569 return TokError("wavefront_size=5 requires +WavefrontSize32");
6570 } else if (C.wavefront_size == 6) {
6571 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
6572 return TokError("wavefront_size=6 requires +WavefrontSize64");
6573 }
6574 }
6575
6576 return false;
6577}
6578
6579bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6580 AMDGPUMCKernelCodeT KernelCode;
6581 KernelCode.initDefault(&getSTI(), getContext());
6582
6583 while (true) {
6584 // Lex EndOfStatement. This is in a while loop, because lexing a comment
6585 // will set the current token to EndOfStatement.
6586 while(trySkipToken(AsmToken::EndOfStatement));
6587
6588 StringRef ID;
6589 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
6590 return true;
6591
6592 if (ID == ".end_amd_kernel_code_t")
6593 break;
6594
6595 if (ParseAMDKernelCodeTValue(ID, KernelCode))
6596 return true;
6597 }
6598
6599 KernelCode.validate(&getSTI(), getContext());
6600 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6601
6602 return false;
6603}
6604
6605bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6606 StringRef KernelName;
6607 if (!parseId(KernelName, "expected symbol name"))
6608 return true;
6609
6610 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6612
6613 KernelScope.initialize(getContext());
6614 return false;
6615}
6616
6617bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6618 if (!getSTI().getTargetTriple().isAMDGCN()) {
6619 return Error(getLoc(),
6620 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6621 "architectures");
6622 }
6623
6624 auto TargetIDDirective = getLexer().getTok().getStringContents();
6625 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6626 return Error(getParser().getTok().getLoc(), "target id must match options");
6627
6628 getTargetStreamer().EmitISAVersion();
6629 Lex();
6630
6631 return false;
6632}
6633
6634bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6635 assert(isHsaAbi(getSTI()));
6636
6637 std::string HSAMetadataString;
6638 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
6639 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
6640 return true;
6641
6642 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6643 return Error(getLoc(), "invalid HSA metadata");
6644
6645 return false;
6646}
6647
6648/// Common code to parse out a block of text (typically YAML) between start and
6649/// end directives.
6650bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6651 const char *AssemblerDirectiveEnd,
6652 std::string &CollectString) {
6653
6654 raw_string_ostream CollectStream(CollectString);
6655
6656 getLexer().setSkipSpace(false);
6657
6658 bool FoundEnd = false;
6659 while (!isToken(AsmToken::Eof)) {
6660 while (isToken(AsmToken::Space)) {
6661 CollectStream << getTokenStr();
6662 Lex();
6663 }
6664
6665 if (trySkipId(AssemblerDirectiveEnd)) {
6666 FoundEnd = true;
6667 break;
6668 }
6669
6670 CollectStream << Parser.parseStringToEndOfStatement()
6671 << getContext().getAsmInfo()->getSeparatorString();
6672
6673 Parser.eatToEndOfStatement();
6674 }
6675
6676 getLexer().setSkipSpace(true);
6677
6678 if (isToken(AsmToken::Eof) && !FoundEnd) {
6679 return TokError(Twine("expected directive ") +
6680 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6681 }
6682
6683 return false;
6684}
6685
6686/// Parse the assembler directive for new MsgPack-format PAL metadata.
6687bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6688 std::string String;
6689 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6691 return true;
6692
6693 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6694 if (!PALMetadata->setFromString(String))
6695 return Error(getLoc(), "invalid PAL metadata");
6696 return false;
6697}
6698
6699/// Parse the assembler directive for old linear-format PAL metadata.
6700bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6701 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6702 return Error(getLoc(),
6703 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6704 "not available on non-amdpal OSes")).str());
6705 }
6706
6707 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6708 PALMetadata->setLegacy();
6709 for (;;) {
6710 uint32_t Key, Value;
6711 if (ParseAsAbsoluteExpression(Key)) {
6712 return TokError(Twine("invalid value in ") +
6714 }
6715 if (!trySkipToken(AsmToken::Comma)) {
6716 return TokError(Twine("expected an even number of values in ") +
6718 }
6719 if (ParseAsAbsoluteExpression(Value)) {
6720 return TokError(Twine("invalid value in ") +
6722 }
6723 PALMetadata->setRegister(Key, Value);
6724 if (!trySkipToken(AsmToken::Comma))
6725 break;
6726 }
6727 return false;
6728}
6729
6730/// ParseDirectiveAMDGPULDS
6731/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6732bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6733 if (getParser().checkForValidSection())
6734 return true;
6735
6736 StringRef Name;
6737 SMLoc NameLoc = getLoc();
6738 if (getParser().parseIdentifier(Name))
6739 return TokError("expected identifier in directive");
6740
6741 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6742 if (getParser().parseComma())
6743 return true;
6744
6745 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
6746
6747 int64_t Size;
6748 SMLoc SizeLoc = getLoc();
6749 if (getParser().parseAbsoluteExpression(Size))
6750 return true;
6751 if (Size < 0)
6752 return Error(SizeLoc, "size must be non-negative");
6753 if (Size > LocalMemorySize)
6754 return Error(SizeLoc, "size is too large");
6755
6756 int64_t Alignment = 4;
6757 if (trySkipToken(AsmToken::Comma)) {
6758 SMLoc AlignLoc = getLoc();
6759 if (getParser().parseAbsoluteExpression(Alignment))
6760 return true;
6761 if (Alignment < 0 || !isPowerOf2_64(Alignment))
6762 return Error(AlignLoc, "alignment must be a power of two");
6763
6764 // Alignment larger than the size of LDS is possible in theory, as long
6765 // as the linker manages to place to symbol at address 0, but we do want
6766 // to make sure the alignment fits nicely into a 32-bit integer.
6767 if (Alignment >= 1u << 31)
6768 return Error(AlignLoc, "alignment is too large");
6769 }
6770
6771 if (parseEOL())
6772 return true;
6773
6774 Symbol->redefineIfPossible();
6775 if (!Symbol->isUndefined())
6776 return Error(NameLoc, "invalid symbol redefinition");
6777
6778 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6779 return false;
6780}
6781
6782bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6783 StringRef IDVal = DirectiveID.getString();
6784
6785 if (isHsaAbi(getSTI())) {
6786 if (IDVal == ".amdhsa_kernel")
6787 return ParseDirectiveAMDHSAKernel();
6788
6789 if (IDVal == ".amdhsa_code_object_version")
6790 return ParseDirectiveAMDHSACodeObjectVersion();
6791
6792 // TODO: Restructure/combine with PAL metadata directive.
6794 return ParseDirectiveHSAMetadata();
6795 } else {
6796 if (IDVal == ".amd_kernel_code_t")
6797 return ParseDirectiveAMDKernelCodeT();
6798
6799 if (IDVal == ".amdgpu_hsa_kernel")
6800 return ParseDirectiveAMDGPUHsaKernel();
6801
6802 if (IDVal == ".amd_amdgpu_isa")
6803 return ParseDirectiveISAVersion();
6804
6806 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6807 Twine(" directive is "
6808 "not available on non-amdhsa OSes"))
6809 .str());
6810 }
6811 }
6812
6813 if (IDVal == ".amdgcn_target")
6814 return ParseDirectiveAMDGCNTarget();
6815
6816 if (IDVal == ".amdgpu_lds")
6817 return ParseDirectiveAMDGPULDS();
6818
6819 if (IDVal == PALMD::AssemblerDirectiveBegin)
6820 return ParseDirectivePALMetadataBegin();
6821
6822 if (IDVal == PALMD::AssemblerDirective)
6823 return ParseDirectivePALMetadata();
6824
6825 return true;
6826}
6827
6828bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6829 MCRegister Reg) {
6830 if (MRI.regsOverlap(TTMP12_TTMP13_TTMP14_TTMP15, Reg))
6831 return isGFX9Plus();
6832
6833 // GFX10+ has 2 more SGPRs 104 and 105.
6834 if (MRI.regsOverlap(SGPR104_SGPR105, Reg))
6835 return hasSGPR104_SGPR105();
6836
6837 switch (Reg.id()) {
6838 case SRC_SHARED_BASE_LO:
6839 case SRC_SHARED_BASE:
6840 case SRC_SHARED_LIMIT_LO:
6841 case SRC_SHARED_LIMIT:
6842 case SRC_PRIVATE_BASE_LO:
6843 case SRC_PRIVATE_BASE:
6844 case SRC_PRIVATE_LIMIT_LO:
6845 case SRC_PRIVATE_LIMIT:
6846 return isGFX9Plus();
6847 case SRC_FLAT_SCRATCH_BASE_LO:
6848 case SRC_FLAT_SCRATCH_BASE_HI:
6849 return hasGloballyAddressableScratch();
6850 case SRC_POPS_EXITING_WAVE_ID:
6851 return isGFX9Plus() && !isGFX11Plus();
6852 case TBA:
6853 case TBA_LO:
6854 case TBA_HI:
6855 case TMA:
6856 case TMA_LO:
6857 case TMA_HI:
6858 return !isGFX9Plus();
6859 case XNACK_MASK:
6860 case XNACK_MASK_LO:
6861 case XNACK_MASK_HI:
6862 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6863 case SGPR_NULL:
6864 return isGFX10Plus();
6865 case SRC_EXECZ:
6866 case SRC_VCCZ:
6867 return !isGFX11Plus();
6868 default:
6869 break;
6870 }
6871
6872 if (isCI())
6873 return true;
6874
6875 if (isSI() || isGFX10Plus()) {
6876 // No flat_scr on SI.
6877 // On GFX10Plus flat scratch is not a valid register operand and can only be
6878 // accessed with s_setreg/s_getreg.
6879 switch (Reg.id()) {
6880 case FLAT_SCR:
6881 case FLAT_SCR_LO:
6882 case FLAT_SCR_HI:
6883 return false;
6884 default:
6885 return true;
6886 }
6887 }
6888
6889 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6890 // SI/CI have.
6891 if (MRI.regsOverlap(SGPR102_SGPR103, Reg))
6892 return hasSGPR102_SGPR103();
6893
6894 return true;
6895}
6896
6897ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6898 StringRef Mnemonic,
6899 OperandMode Mode) {
6900 ParseStatus Res = parseVOPD(Operands);
6901 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6902 return Res;
6903
6904 // Try to parse with a custom parser
6905 Res = MatchOperandParserImpl(Operands, Mnemonic);
6906
6907 // If we successfully parsed the operand or if there as an error parsing,
6908 // we are done.
6909 //
6910 // If we are parsing after we reach EndOfStatement then this means we
6911 // are appending default values to the Operands list. This is only done
6912 // by custom parser, so we shouldn't continue on to the generic parsing.
6913 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6914 return Res;
6915
6916 SMLoc RBraceLoc;
6917 SMLoc LBraceLoc = getLoc();
6918 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6919 unsigned Prefix = Operands.size();
6920
6921 for (;;) {
6922 auto Loc = getLoc();
6923 Res = parseReg(Operands);
6924 if (Res.isNoMatch())
6925 Error(Loc, "expected a register");
6926 if (!Res.isSuccess())
6927 return ParseStatus::Failure;
6928
6929 RBraceLoc = getLoc();
6930 if (trySkipToken(AsmToken::RBrac))
6931 break;
6932
6933 if (!skipToken(AsmToken::Comma,
6934 "expected a comma or a closing square bracket"))
6935 return ParseStatus::Failure;
6936 }
6937
6938 if (Operands.size() - Prefix > 1) {
6939 Operands.insert(Operands.begin() + Prefix,
6940 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6941 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6942 }
6943
6944 return ParseStatus::Success;
6945 }
6946
6947 return parseRegOrImm(Operands);
6948}
6949
6950StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6951 // Clear any forced encodings from the previous instruction.
6952 setForcedEncodingSize(0);
6953 setForcedDPP(false);
6954 setForcedSDWA(false);
6955
6956 if (Name.consume_back("_e64_dpp")) {
6957 setForcedDPP(true);
6958 setForcedEncodingSize(64);
6959 return Name;
6960 }
6961 if (Name.consume_back("_e64")) {
6962 setForcedEncodingSize(64);
6963 return Name;
6964 }
6965 if (Name.consume_back("_e32")) {
6966 setForcedEncodingSize(32);
6967 return Name;
6968 }
6969 if (Name.consume_back("_dpp")) {
6970 setForcedDPP(true);
6971 return Name;
6972 }
6973 if (Name.consume_back("_sdwa")) {
6974 setForcedSDWA(true);
6975 return Name;
6976 }
6977 return Name;
6978}
6979
6980static void applyMnemonicAliases(StringRef &Mnemonic,
6981 const FeatureBitset &Features,
6982 unsigned VariantID);
6983
6984bool AMDGPUAsmParser::parseInstruction(ParseInstructionInfo &Info,
6985 StringRef Name, SMLoc NameLoc,
6987 // Add the instruction mnemonic
6988 Name = parseMnemonicSuffix(Name);
6989
6990 // If the target architecture uses MnemonicAlias, call it here to parse
6991 // operands correctly.
6992 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6993
6994 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6995
6996 bool IsMIMG = Name.starts_with("image_");
6997
6998 while (!trySkipToken(AsmToken::EndOfStatement)) {
6999 OperandMode Mode = OperandMode_Default;
7000 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
7001 Mode = OperandMode_NSA;
7002 ParseStatus Res = parseOperand(Operands, Name, Mode);
7003
7004 if (!Res.isSuccess()) {
7005 checkUnsupportedInstruction(Name, NameLoc);
7006 if (!Parser.hasPendingError()) {
7007 // FIXME: use real operand location rather than the current location.
7008 StringRef Msg = Res.isFailure() ? "failed parsing operand."
7009 : "not a valid operand.";
7010 Error(getLoc(), Msg);
7011 }
7012 while (!trySkipToken(AsmToken::EndOfStatement)) {
7013 lex();
7014 }
7015 return true;
7016 }
7017
7018 // Eat the comma or space if there is one.
7019 trySkipToken(AsmToken::Comma);
7020 }
7021
7022 return false;
7023}
7024
7025//===----------------------------------------------------------------------===//
7026// Utility functions
7027//===----------------------------------------------------------------------===//
7028
7029ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
7031 SMLoc S = getLoc();
7032 if (!trySkipId(Name))
7033 return ParseStatus::NoMatch;
7034
7035 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
7036 return ParseStatus::Success;
7037}
7038
7039ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
7040 int64_t &IntVal) {
7041
7042 if (!trySkipId(Prefix, AsmToken::Colon))
7043 return ParseStatus::NoMatch;
7044
7046}
7047
7048ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
7049 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7050 std::function<bool(int64_t &)> ConvertResult) {
7051 SMLoc S = getLoc();
7052 int64_t Value = 0;
7053
7054 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
7055 if (!Res.isSuccess())
7056 return Res;
7057
7058 if (ConvertResult && !ConvertResult(Value)) {
7059 Error(S, "invalid " + StringRef(Prefix) + " value.");
7060 }
7061
7062 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
7063 return ParseStatus::Success;
7064}
7065
7066ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
7067 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7068 bool (*ConvertResult)(int64_t &)) {
7069 SMLoc S = getLoc();
7070 if (!trySkipId(Prefix, AsmToken::Colon))
7071 return ParseStatus::NoMatch;
7072
7073 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
7074 return ParseStatus::Failure;
7075
7076 unsigned Val = 0;
7077 const unsigned MaxSize = 4;
7078
7079 // FIXME: How to verify the number of elements matches the number of src
7080 // operands?
7081 for (int I = 0; ; ++I) {
7082 int64_t Op;
7083 SMLoc Loc = getLoc();
7084 if (!parseExpr(Op))
7085 return ParseStatus::Failure;
7086
7087 if (Op != 0 && Op != 1)
7088 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
7089
7090 Val |= (Op << I);
7091
7092 if (trySkipToken(AsmToken::RBrac))
7093 break;
7094
7095 if (I + 1 == MaxSize)
7096 return Error(getLoc(), "expected a closing square bracket");
7097
7098 if (!skipToken(AsmToken::Comma, "expected a comma"))
7099 return ParseStatus::Failure;
7100 }
7101
7102 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
7103 return ParseStatus::Success;
7104}
7105
7106ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
7108 AMDGPUOperand::ImmTy ImmTy) {
7109 int64_t Bit;
7110 SMLoc S = getLoc();
7111
7112 if (trySkipId(Name)) {
7113 Bit = 1;
7114 } else if (trySkipId("no", Name)) {
7115 Bit = 0;
7116 } else {
7117 return ParseStatus::NoMatch;
7118 }
7119
7120 if (Name == "r128" && !hasMIMG_R128())
7121 return Error(S, "r128 modifier is not supported on this GPU");
7122 if (Name == "a16" && !hasA16())
7123 return Error(S, "a16 modifier is not supported on this GPU");
7124
7125 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
7126 ImmTy = AMDGPUOperand::ImmTyR128A16;
7127
7128 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
7129 return ParseStatus::Success;
7130}
7131
7132unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
7133 bool &Disabling) const {
7134 Disabling = Id.consume_front("no");
7135
7136 if (isGFX940() && !Mnemo.starts_with("s_")) {
7137 return StringSwitch<unsigned>(Id)
7138 .Case("nt", AMDGPU::CPol::NT)
7139 .Case("sc0", AMDGPU::CPol::SC0)
7140 .Case("sc1", AMDGPU::CPol::SC1)
7141 .Default(0);
7142 }
7143
7144 return StringSwitch<unsigned>(Id)
7145 .Case("dlc", AMDGPU::CPol::DLC)
7146 .Case("glc", AMDGPU::CPol::GLC)
7147 .Case("scc", AMDGPU::CPol::SCC)
7148 .Case("slc", AMDGPU::CPol::SLC)
7149 .Default(0);
7150}
7151
7152ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
7153 if (isGFX12Plus()) {
7154 SMLoc StringLoc = getLoc();
7155
7156 int64_t CPolVal = 0;
7157 ParseStatus ResTH = ParseStatus::NoMatch;
7158 ParseStatus ResScope = ParseStatus::NoMatch;
7159 ParseStatus ResNV = ParseStatus::NoMatch;
7160 ParseStatus ResScal = ParseStatus::NoMatch;
7161
7162 for (;;) {
7163 if (ResTH.isNoMatch()) {
7164 int64_t TH;
7165 ResTH = parseTH(Operands, TH);
7166 if (ResTH.isFailure())
7167 return ResTH;
7168 if (ResTH.isSuccess()) {
7169 CPolVal |= TH;
7170 continue;
7171 }
7172 }
7173
7174 if (ResScope.isNoMatch()) {
7175 int64_t Scope;
7176 ResScope = parseScope(Operands, Scope);
7177 if (ResScope.isFailure())
7178 return ResScope;
7179 if (ResScope.isSuccess()) {
7180 CPolVal |= Scope;
7181 continue;
7182 }
7183 }
7184
7185 // NV bit exists on GFX12+, but does something starting from GFX1250.
7186 // Allow parsing on all GFX12 and fail on validation for better
7187 // diagnostics.
7188 if (ResNV.isNoMatch()) {
7189 if (trySkipId("nv")) {
7190 ResNV = ParseStatus::Success;
7191 CPolVal |= CPol::NV;
7192 continue;
7193 } else if (trySkipId("no", "nv")) {
7194 ResNV = ParseStatus::Success;
7195 continue;
7196 }
7197 }
7198
7199 if (ResScal.isNoMatch()) {
7200 if (trySkipId("scale_offset")) {
7201 ResScal = ParseStatus::Success;
7202 CPolVal |= CPol::SCAL;
7203 continue;
7204 } else if (trySkipId("no", "scale_offset")) {
7205 ResScal = ParseStatus::Success;
7206 continue;
7207 }
7208 }
7209
7210 break;
7211 }
7212
7213 if (ResTH.isNoMatch() && ResScope.isNoMatch() && ResNV.isNoMatch() &&
7214 ResScal.isNoMatch())
7215 return ParseStatus::NoMatch;
7216
7217 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
7218 AMDGPUOperand::ImmTyCPol));
7219 return ParseStatus::Success;
7220 }
7221
7222 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7223 SMLoc OpLoc = getLoc();
7224 unsigned Enabled = 0, Seen = 0;
7225 for (;;) {
7226 SMLoc S = getLoc();
7227 bool Disabling;
7228 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
7229 if (!CPol)
7230 break;
7231
7232 lex();
7233
7234 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
7235 return Error(S, "dlc modifier is not supported on this GPU");
7236
7237 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
7238 return Error(S, "scc modifier is not supported on this GPU");
7239
7240 if (Seen & CPol)
7241 return Error(S, "duplicate cache policy modifier");
7242
7243 if (!Disabling)
7244 Enabled |= CPol;
7245
7246 Seen |= CPol;
7247 }
7248
7249 if (!Seen)
7250 return ParseStatus::NoMatch;
7251
7252 Operands.push_back(
7253 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
7254 return ParseStatus::Success;
7255}
7256
7257ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
7258 int64_t &Scope) {
7259 static const unsigned Scopes[] = {CPol::SCOPE_CU, CPol::SCOPE_SE,
7261
7262 ParseStatus Res = parseStringOrIntWithPrefix(
7263 Operands, "scope", {"SCOPE_CU", "SCOPE_SE", "SCOPE_DEV", "SCOPE_SYS"},
7264 Scope);
7265
7266 if (Res.isSuccess())
7267 Scope = Scopes[Scope];
7268
7269 return Res;
7270}
7271
7272ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
7273 TH = AMDGPU::CPol::TH_RT; // default
7274
7275 StringRef Value;
7276 SMLoc StringLoc;
7277 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
7278 if (!Res.isSuccess())
7279 return Res;
7280
7281 if (Value == "TH_DEFAULT")
7283 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_WB" ||
7284 Value == "TH_LOAD_NT_WB") {
7285 return Error(StringLoc, "invalid th value");
7286 } else if (Value.consume_front("TH_ATOMIC_")) {
7288 } else if (Value.consume_front("TH_LOAD_")) {
7290 } else if (Value.consume_front("TH_STORE_")) {
7292 } else {
7293 return Error(StringLoc, "invalid th value");
7294 }
7295
7296 if (Value == "BYPASS")
7298
7299 if (TH != 0) {
7301 TH |= StringSwitch<int64_t>(Value)
7302 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7303 .Case("RT", AMDGPU::CPol::TH_RT)
7304 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7305 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT)
7306 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
7308 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE)
7309 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
7311 .Default(0xffffffff);
7312 else
7313 TH |= StringSwitch<int64_t>(Value)
7314 .Case("RT", AMDGPU::CPol::TH_RT)
7315 .Case("NT", AMDGPU::CPol::TH_NT)
7316 .Case("HT", AMDGPU::CPol::TH_HT)
7317 .Case("LU", AMDGPU::CPol::TH_LU)
7318 .Case("WB", AMDGPU::CPol::TH_WB)
7319 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
7320 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
7321 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
7322 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
7323 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
7324 .Default(0xffffffff);
7325 }
7326
7327 if (TH == 0xffffffff)
7328 return Error(StringLoc, "invalid th value");
7329
7330 return ParseStatus::Success;
7331}
7332
7333static void
7335 AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
7336 AMDGPUOperand::ImmTy ImmT, int64_t Default = 0,
7337 std::optional<unsigned> InsertAt = std::nullopt) {
7338 auto i = OptionalIdx.find(ImmT);
7339 if (i != OptionalIdx.end()) {
7340 unsigned Idx = i->second;
7341 const AMDGPUOperand &Op =
7342 static_cast<const AMDGPUOperand &>(*Operands[Idx]);
7343 if (InsertAt)
7344 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Op.getImm()));
7345 else
7346 Op.addImmOperands(Inst, 1);
7347 } else {
7348 if (InsertAt.has_value())
7349 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Default));
7350 else
7352 }
7353}
7354
7355ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
7356 StringRef &Value,
7357 SMLoc &StringLoc) {
7358 if (!trySkipId(Prefix, AsmToken::Colon))
7359 return ParseStatus::NoMatch;
7360
7361 StringLoc = getLoc();
7362 return parseId(Value, "expected an identifier") ? ParseStatus::Success
7364}
7365
7366ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7367 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7368 int64_t &IntVal) {
7369 if (!trySkipId(Name, AsmToken::Colon))
7370 return ParseStatus::NoMatch;
7371
7372 SMLoc StringLoc = getLoc();
7373
7374 StringRef Value;
7375 if (isToken(AsmToken::Identifier)) {
7376 Value = getTokenStr();
7377 lex();
7378
7379 for (IntVal = 0; IntVal < (int64_t)Ids.size(); ++IntVal)
7380 if (Value == Ids[IntVal])
7381 break;
7382 } else if (!parseExpr(IntVal))
7383 return ParseStatus::Failure;
7384
7385 if (IntVal < 0 || IntVal >= (int64_t)Ids.size())
7386 return Error(StringLoc, "invalid " + Twine(Name) + " value");
7387
7388 return ParseStatus::Success;
7389}
7390
7391ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7392 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7393 AMDGPUOperand::ImmTy Type) {
7394 SMLoc S = getLoc();
7395 int64_t IntVal;
7396
7397 ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
7398 if (Res.isSuccess())
7399 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S, Type));
7400
7401 return Res;
7402}
7403
7404//===----------------------------------------------------------------------===//
7405// MTBUF format
7406//===----------------------------------------------------------------------===//
7407
7408bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
7409 int64_t MaxVal,
7410 int64_t &Fmt) {
7411 int64_t Val;
7412 SMLoc Loc = getLoc();
7413
7414 auto Res = parseIntWithPrefix(Pref, Val);
7415 if (Res.isFailure())
7416 return false;
7417 if (Res.isNoMatch())
7418 return true;
7419
7420 if (Val < 0 || Val > MaxVal) {
7421 Error(Loc, Twine("out of range ", StringRef(Pref)));
7422 return false;
7423 }
7424
7425 Fmt = Val;
7426 return true;
7427}
7428
7429ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
7430 AMDGPUOperand::ImmTy ImmTy) {
7431 const char *Pref = "index_key";
7432 int64_t ImmVal = 0;
7433 SMLoc Loc = getLoc();
7434 auto Res = parseIntWithPrefix(Pref, ImmVal);
7435 if (!Res.isSuccess())
7436 return Res;
7437
7438 if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
7439 ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
7440 (ImmVal < 0 || ImmVal > 1))
7441 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7442
7443 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
7444 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7445
7446 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
7447 return ParseStatus::Success;
7448}
7449
7450ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
7451 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
7452}
7453
7454ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
7455 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
7456}
7457
7458ParseStatus AMDGPUAsmParser::parseIndexKey32bit(OperandVector &Operands) {
7459 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey32bit);
7460}
7461
7462ParseStatus AMDGPUAsmParser::tryParseMatrixFMT(OperandVector &Operands,
7463 StringRef Name,
7464 AMDGPUOperand::ImmTy Type) {
7465 return parseStringOrIntWithPrefix(Operands, Name,
7466 {"MATRIX_FMT_FP8", "MATRIX_FMT_BF8",
7467 "MATRIX_FMT_FP6", "MATRIX_FMT_BF6",
7468 "MATRIX_FMT_FP4"},
7469 Type);
7470}
7471
7472ParseStatus AMDGPUAsmParser::parseMatrixAFMT(OperandVector &Operands) {
7473 return tryParseMatrixFMT(Operands, "matrix_a_fmt",
7474 AMDGPUOperand::ImmTyMatrixAFMT);
7475}
7476
7477ParseStatus AMDGPUAsmParser::parseMatrixBFMT(OperandVector &Operands) {
7478 return tryParseMatrixFMT(Operands, "matrix_b_fmt",
7479 AMDGPUOperand::ImmTyMatrixBFMT);
7480}
7481
7482ParseStatus AMDGPUAsmParser::tryParseMatrixScale(OperandVector &Operands,
7483 StringRef Name,
7484 AMDGPUOperand::ImmTy Type) {
7485 return parseStringOrIntWithPrefix(
7486 Operands, Name, {"MATRIX_SCALE_ROW0", "MATRIX_SCALE_ROW1"}, Type);
7487}
7488
7489ParseStatus AMDGPUAsmParser::parseMatrixAScale(OperandVector &Operands) {
7490 return tryParseMatrixScale(Operands, "matrix_a_scale",
7491 AMDGPUOperand::ImmTyMatrixAScale);
7492}
7493
7494ParseStatus AMDGPUAsmParser::parseMatrixBScale(OperandVector &Operands) {
7495 return tryParseMatrixScale(Operands, "matrix_b_scale",
7496 AMDGPUOperand::ImmTyMatrixBScale);
7497}
7498
7499ParseStatus AMDGPUAsmParser::tryParseMatrixScaleFmt(OperandVector &Operands,
7500 StringRef Name,
7501 AMDGPUOperand::ImmTy Type) {
7502 return parseStringOrIntWithPrefix(
7503 Operands, Name,
7504 {"MATRIX_SCALE_FMT_E8", "MATRIX_SCALE_FMT_E5M3", "MATRIX_SCALE_FMT_E4M3"},
7505 Type);
7506}
7507
7508ParseStatus AMDGPUAsmParser::parseMatrixAScaleFmt(OperandVector &Operands) {
7509 return tryParseMatrixScaleFmt(Operands, "matrix_a_scale_fmt",
7510 AMDGPUOperand::ImmTyMatrixAScaleFmt);
7511}
7512
7513ParseStatus AMDGPUAsmParser::parseMatrixBScaleFmt(OperandVector &Operands) {
7514 return tryParseMatrixScaleFmt(Operands, "matrix_b_scale_fmt",
7515 AMDGPUOperand::ImmTyMatrixBScaleFmt);
7516}
7517
7518// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
7519// values to live in a joint format operand in the MCInst encoding.
7520ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
7521 using namespace llvm::AMDGPU::MTBUFFormat;
7522
7523 int64_t Dfmt = DFMT_UNDEF;
7524 int64_t Nfmt = NFMT_UNDEF;
7525
7526 // dfmt and nfmt can appear in either order, and each is optional.
7527 for (int I = 0; I < 2; ++I) {
7528 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
7529 return ParseStatus::Failure;
7530
7531 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
7532 return ParseStatus::Failure;
7533
7534 // Skip optional comma between dfmt/nfmt
7535 // but guard against 2 commas following each other.
7536 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
7537 !peekToken().is(AsmToken::Comma)) {
7538 trySkipToken(AsmToken::Comma);
7539 }
7540 }
7541
7542 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
7543 return ParseStatus::NoMatch;
7544
7545 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7546 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7547
7548 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7549 return ParseStatus::Success;
7550}
7551
7552ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
7553 using namespace llvm::AMDGPU::MTBUFFormat;
7554
7555 int64_t Fmt = UFMT_UNDEF;
7556
7557 if (!tryParseFmt("format", UFMT_MAX, Fmt))
7558 return ParseStatus::Failure;
7559
7560 if (Fmt == UFMT_UNDEF)
7561 return ParseStatus::NoMatch;
7562
7563 Format = Fmt;
7564 return ParseStatus::Success;
7565}
7566
7567bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
7568 int64_t &Nfmt,
7569 StringRef FormatStr,
7570 SMLoc Loc) {
7571 using namespace llvm::AMDGPU::MTBUFFormat;
7572 int64_t Format;
7573
7574 Format = getDfmt(FormatStr);
7575 if (Format != DFMT_UNDEF) {
7576 Dfmt = Format;
7577 return true;
7578 }
7579
7580 Format = getNfmt(FormatStr, getSTI());
7581 if (Format != NFMT_UNDEF) {
7582 Nfmt = Format;
7583 return true;
7584 }
7585
7586 Error(Loc, "unsupported format");
7587 return false;
7588}
7589
7590ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7591 SMLoc FormatLoc,
7592 int64_t &Format) {
7593 using namespace llvm::AMDGPU::MTBUFFormat;
7594
7595 int64_t Dfmt = DFMT_UNDEF;
7596 int64_t Nfmt = NFMT_UNDEF;
7597 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
7598 return ParseStatus::Failure;
7599
7600 if (trySkipToken(AsmToken::Comma)) {
7601 StringRef Str;
7602 SMLoc Loc = getLoc();
7603 if (!parseId(Str, "expected a format string") ||
7604 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
7605 return ParseStatus::Failure;
7606 if (Dfmt == DFMT_UNDEF)
7607 return Error(Loc, "duplicate numeric format");
7608 if (Nfmt == NFMT_UNDEF)
7609 return Error(Loc, "duplicate data format");
7610 }
7611
7612 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7613 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7614
7615 if (isGFX10Plus()) {
7616 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
7617 if (Ufmt == UFMT_UNDEF)
7618 return Error(FormatLoc, "unsupported format");
7619 Format = Ufmt;
7620 } else {
7621 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7622 }
7623
7624 return ParseStatus::Success;
7625}
7626
7627ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7628 SMLoc Loc,
7629 int64_t &Format) {
7630 using namespace llvm::AMDGPU::MTBUFFormat;
7631
7632 auto Id = getUnifiedFormat(FormatStr, getSTI());
7633 if (Id == UFMT_UNDEF)
7634 return ParseStatus::NoMatch;
7635
7636 if (!isGFX10Plus())
7637 return Error(Loc, "unified format is not supported on this GPU");
7638
7639 Format = Id;
7640 return ParseStatus::Success;
7641}
7642
7643ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
7644 using namespace llvm::AMDGPU::MTBUFFormat;
7645 SMLoc Loc = getLoc();
7646
7647 if (!parseExpr(Format))
7648 return ParseStatus::Failure;
7649 if (!isValidFormatEncoding(Format, getSTI()))
7650 return Error(Loc, "out of range format");
7651
7652 return ParseStatus::Success;
7653}
7654
7655ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
7656 using namespace llvm::AMDGPU::MTBUFFormat;
7657
7658 if (!trySkipId("format", AsmToken::Colon))
7659 return ParseStatus::NoMatch;
7660
7661 if (trySkipToken(AsmToken::LBrac)) {
7662 StringRef FormatStr;
7663 SMLoc Loc = getLoc();
7664 if (!parseId(FormatStr, "expected a format string"))
7665 return ParseStatus::Failure;
7666
7667 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
7668 if (Res.isNoMatch())
7669 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
7670 if (!Res.isSuccess())
7671 return Res;
7672
7673 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7674 return ParseStatus::Failure;
7675
7676 return ParseStatus::Success;
7677 }
7678
7679 return parseNumericFormat(Format);
7680}
7681
7682ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
7683 using namespace llvm::AMDGPU::MTBUFFormat;
7684
7685 int64_t Format = getDefaultFormatEncoding(getSTI());
7686 ParseStatus Res;
7687 SMLoc Loc = getLoc();
7688
7689 // Parse legacy format syntax.
7690 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
7691 if (Res.isFailure())
7692 return Res;
7693
7694 bool FormatFound = Res.isSuccess();
7695
7696 Operands.push_back(
7697 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7698
7699 if (FormatFound)
7700 trySkipToken(AsmToken::Comma);
7701
7702 if (isToken(AsmToken::EndOfStatement)) {
7703 // We are expecting an soffset operand,
7704 // but let matcher handle the error.
7705 return ParseStatus::Success;
7706 }
7707
7708 // Parse soffset.
7709 Res = parseRegOrImm(Operands);
7710 if (!Res.isSuccess())
7711 return Res;
7712
7713 trySkipToken(AsmToken::Comma);
7714
7715 if (!FormatFound) {
7716 Res = parseSymbolicOrNumericFormat(Format);
7717 if (Res.isFailure())
7718 return Res;
7719 if (Res.isSuccess()) {
7720 auto Size = Operands.size();
7721 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
7722 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7723 Op.setImm(Format);
7724 }
7725 return ParseStatus::Success;
7726 }
7727
7728 if (isId("format") && peekToken().is(AsmToken::Colon))
7729 return Error(getLoc(), "duplicate format");
7730 return ParseStatus::Success;
7731}
7732
7733ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
7734 ParseStatus Res =
7735 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
7736 if (Res.isNoMatch()) {
7737 Res = parseIntWithPrefix("inst_offset", Operands,
7738 AMDGPUOperand::ImmTyInstOffset);
7739 }
7740 return Res;
7741}
7742
7743ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
7744 ParseStatus Res =
7745 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
7746 if (Res.isNoMatch())
7747 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
7748 return Res;
7749}
7750
7751ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7752 ParseStatus Res =
7753 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7754 if (Res.isNoMatch()) {
7755 Res =
7756 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
7757 }
7758 return Res;
7759}
7760
7761//===----------------------------------------------------------------------===//
7762// Exp
7763//===----------------------------------------------------------------------===//
7764
7765void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7766 OptionalImmIndexMap OptionalIdx;
7767
7768 unsigned OperandIdx[4];
7769 unsigned EnMask = 0;
7770 int SrcIdx = 0;
7771
7772 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7773 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7774
7775 // Add the register arguments
7776 if (Op.isReg()) {
7777 assert(SrcIdx < 4);
7778 OperandIdx[SrcIdx] = Inst.size();
7779 Op.addRegOperands(Inst, 1);
7780 ++SrcIdx;
7781 continue;
7782 }
7783
7784 if (Op.isOff()) {
7785 assert(SrcIdx < 4);
7786 OperandIdx[SrcIdx] = Inst.size();
7787 Inst.addOperand(MCOperand::createReg(MCRegister()));
7788 ++SrcIdx;
7789 continue;
7790 }
7791
7792 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7793 Op.addImmOperands(Inst, 1);
7794 continue;
7795 }
7796
7797 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7798 continue;
7799
7800 // Handle optional arguments
7801 OptionalIdx[Op.getImmTy()] = i;
7802 }
7803
7804 assert(SrcIdx == 4);
7805
7806 bool Compr = false;
7807 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7808 Compr = true;
7809 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
7810 Inst.getOperand(OperandIdx[2]).setReg(MCRegister());
7811 Inst.getOperand(OperandIdx[3]).setReg(MCRegister());
7812 }
7813
7814 for (auto i = 0; i < SrcIdx; ++i) {
7815 if (Inst.getOperand(OperandIdx[i]).getReg()) {
7816 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7817 }
7818 }
7819
7820 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
7821 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
7822
7823 Inst.addOperand(MCOperand::createImm(EnMask));
7824}
7825
7826//===----------------------------------------------------------------------===//
7827// s_waitcnt
7828//===----------------------------------------------------------------------===//
7829
7830static bool
7832 const AMDGPU::IsaVersion ISA,
7833 int64_t &IntVal,
7834 int64_t CntVal,
7835 bool Saturate,
7836 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7837 unsigned (*decode)(const IsaVersion &Version, unsigned))
7838{
7839 bool Failed = false;
7840
7841 IntVal = encode(ISA, IntVal, CntVal);
7842 if (CntVal != decode(ISA, IntVal)) {
7843 if (Saturate) {
7844 IntVal = encode(ISA, IntVal, -1);
7845 } else {
7846 Failed = true;
7847 }
7848 }
7849 return Failed;
7850}
7851
7852bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7853
7854 SMLoc CntLoc = getLoc();
7855 StringRef CntName = getTokenStr();
7856
7857 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7858 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7859 return false;
7860
7861 int64_t CntVal;
7862 SMLoc ValLoc = getLoc();
7863 if (!parseExpr(CntVal))
7864 return false;
7865
7866 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7867
7868 bool Failed = true;
7869 bool Sat = CntName.ends_with("_sat");
7870
7871 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7872 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
7873 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7874 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
7875 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7876 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
7877 } else {
7878 Error(CntLoc, "invalid counter name " + CntName);
7879 return false;
7880 }
7881
7882 if (Failed) {
7883 Error(ValLoc, "too large value for " + CntName);
7884 return false;
7885 }
7886
7887 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7888 return false;
7889
7890 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7891 if (isToken(AsmToken::EndOfStatement)) {
7892 Error(getLoc(), "expected a counter name");
7893 return false;
7894 }
7895 }
7896
7897 return true;
7898}
7899
7900ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7901 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7902 int64_t Waitcnt = getWaitcntBitMask(ISA);
7903 SMLoc S = getLoc();
7904
7905 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7906 while (!isToken(AsmToken::EndOfStatement)) {
7907 if (!parseCnt(Waitcnt))
7908 return ParseStatus::Failure;
7909 }
7910 } else {
7911 if (!parseExpr(Waitcnt))
7912 return ParseStatus::Failure;
7913 }
7914
7915 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
7916 return ParseStatus::Success;
7917}
7918
7919bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7920 SMLoc FieldLoc = getLoc();
7921 StringRef FieldName = getTokenStr();
7922 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
7923 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7924 return false;
7925
7926 SMLoc ValueLoc = getLoc();
7927 StringRef ValueName = getTokenStr();
7928 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
7929 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
7930 return false;
7931
7932 unsigned Shift;
7933 if (FieldName == "instid0") {
7934 Shift = 0;
7935 } else if (FieldName == "instskip") {
7936 Shift = 4;
7937 } else if (FieldName == "instid1") {
7938 Shift = 7;
7939 } else {
7940 Error(FieldLoc, "invalid field name " + FieldName);
7941 return false;
7942 }
7943
7944 int Value;
7945 if (Shift == 4) {
7946 // Parse values for instskip.
7947 Value = StringSwitch<int>(ValueName)
7948 .Case("SAME", 0)
7949 .Case("NEXT", 1)
7950 .Case("SKIP_1", 2)
7951 .Case("SKIP_2", 3)
7952 .Case("SKIP_3", 4)
7953 .Case("SKIP_4", 5)
7954 .Default(-1);
7955 } else {
7956 // Parse values for instid0 and instid1.
7957 Value = StringSwitch<int>(ValueName)
7958 .Case("NO_DEP", 0)
7959 .Case("VALU_DEP_1", 1)
7960 .Case("VALU_DEP_2", 2)
7961 .Case("VALU_DEP_3", 3)
7962 .Case("VALU_DEP_4", 4)
7963 .Case("TRANS32_DEP_1", 5)
7964 .Case("TRANS32_DEP_2", 6)
7965 .Case("TRANS32_DEP_3", 7)
7966 .Case("FMA_ACCUM_CYCLE_1", 8)
7967 .Case("SALU_CYCLE_1", 9)
7968 .Case("SALU_CYCLE_2", 10)
7969 .Case("SALU_CYCLE_3", 11)
7970 .Default(-1);
7971 }
7972 if (Value < 0) {
7973 Error(ValueLoc, "invalid value name " + ValueName);
7974 return false;
7975 }
7976
7977 Delay |= Value << Shift;
7978 return true;
7979}
7980
7981ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7982 int64_t Delay = 0;
7983 SMLoc S = getLoc();
7984
7985 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7986 do {
7987 if (!parseDelay(Delay))
7988 return ParseStatus::Failure;
7989 } while (trySkipToken(AsmToken::Pipe));
7990 } else {
7991 if (!parseExpr(Delay))
7992 return ParseStatus::Failure;
7993 }
7994
7995 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
7996 return ParseStatus::Success;
7997}
7998
7999bool
8000AMDGPUOperand::isSWaitCnt() const {
8001 return isImm();
8002}
8003
8004bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
8005
8006//===----------------------------------------------------------------------===//
8007// DepCtr
8008//===----------------------------------------------------------------------===//
8009
8010void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
8011 StringRef DepCtrName) {
8012 switch (ErrorId) {
8013 case OPR_ID_UNKNOWN:
8014 Error(Loc, Twine("invalid counter name ", DepCtrName));
8015 return;
8016 case OPR_ID_UNSUPPORTED:
8017 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
8018 return;
8019 case OPR_ID_DUPLICATE:
8020 Error(Loc, Twine("duplicate counter name ", DepCtrName));
8021 return;
8022 case OPR_VAL_INVALID:
8023 Error(Loc, Twine("invalid value for ", DepCtrName));
8024 return;
8025 default:
8026 assert(false);
8027 }
8028}
8029
8030bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
8031
8032 using namespace llvm::AMDGPU::DepCtr;
8033
8034 SMLoc DepCtrLoc = getLoc();
8035 StringRef DepCtrName = getTokenStr();
8036
8037 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
8038 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
8039 return false;
8040
8041 int64_t ExprVal;
8042 if (!parseExpr(ExprVal))
8043 return false;
8044
8045 unsigned PrevOprMask = UsedOprMask;
8046 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
8047
8048 if (CntVal < 0) {
8049 depCtrError(DepCtrLoc, CntVal, DepCtrName);
8050 return false;
8051 }
8052
8053 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8054 return false;
8055
8056 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
8057 if (isToken(AsmToken::EndOfStatement)) {
8058 Error(getLoc(), "expected a counter name");
8059 return false;
8060 }
8061 }
8062
8063 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
8064 DepCtr = (DepCtr & ~CntValMask) | CntVal;
8065 return true;
8066}
8067
8068ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
8069 using namespace llvm::AMDGPU::DepCtr;
8070
8071 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
8072 SMLoc Loc = getLoc();
8073
8074 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
8075 unsigned UsedOprMask = 0;
8076 while (!isToken(AsmToken::EndOfStatement)) {
8077 if (!parseDepCtr(DepCtr, UsedOprMask))
8078 return ParseStatus::Failure;
8079 }
8080 } else {
8081 if (!parseExpr(DepCtr))
8082 return ParseStatus::Failure;
8083 }
8084
8085 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
8086 return ParseStatus::Success;
8087}
8088
8089bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
8090
8091//===----------------------------------------------------------------------===//
8092// hwreg
8093//===----------------------------------------------------------------------===//
8094
8095ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
8096 OperandInfoTy &Offset,
8097 OperandInfoTy &Width) {
8098 using namespace llvm::AMDGPU::Hwreg;
8099
8100 if (!trySkipId("hwreg", AsmToken::LParen))
8101 return ParseStatus::NoMatch;
8102
8103 // The register may be specified by name or using a numeric code
8104 HwReg.Loc = getLoc();
8105 if (isToken(AsmToken::Identifier) &&
8106 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8107 HwReg.IsSymbolic = true;
8108 lex(); // skip register name
8109 } else if (!parseExpr(HwReg.Val, "a register name")) {
8110 return ParseStatus::Failure;
8111 }
8112
8113 if (trySkipToken(AsmToken::RParen))
8114 return ParseStatus::Success;
8115
8116 // parse optional params
8117 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
8118 return ParseStatus::Failure;
8119
8120 Offset.Loc = getLoc();
8121 if (!parseExpr(Offset.Val))
8122 return ParseStatus::Failure;
8123
8124 if (!skipToken(AsmToken::Comma, "expected a comma"))
8125 return ParseStatus::Failure;
8126
8127 Width.Loc = getLoc();
8128 if (!parseExpr(Width.Val) ||
8129 !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8130 return ParseStatus::Failure;
8131
8132 return ParseStatus::Success;
8133}
8134
8135ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
8136 using namespace llvm::AMDGPU::Hwreg;
8137
8138 int64_t ImmVal = 0;
8139 SMLoc Loc = getLoc();
8140
8141 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
8142 HwregId::Default);
8143 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
8144 HwregOffset::Default);
8145 struct : StructuredOpField {
8146 using StructuredOpField::StructuredOpField;
8147 bool validate(AMDGPUAsmParser &Parser) const override {
8148 if (!isUIntN(Width, Val - 1))
8149 return Error(Parser, "only values from 1 to 32 are legal");
8150 return true;
8151 }
8152 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
8153 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
8154
8155 if (Res.isNoMatch())
8156 Res = parseHwregFunc(HwReg, Offset, Width);
8157
8158 if (Res.isSuccess()) {
8159 if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
8160 return ParseStatus::Failure;
8161 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
8162 }
8163
8164 if (Res.isNoMatch() &&
8165 parseExpr(ImmVal, "a hwreg macro, structured immediate"))
8167
8168 if (!Res.isSuccess())
8169 return ParseStatus::Failure;
8170
8171 if (!isUInt<16>(ImmVal))
8172 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8173 Operands.push_back(
8174 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
8175 return ParseStatus::Success;
8176}
8177
8178bool AMDGPUOperand::isHwreg() const {
8179 return isImmTy(ImmTyHwreg);
8180}
8181
8182//===----------------------------------------------------------------------===//
8183// sendmsg
8184//===----------------------------------------------------------------------===//
8185
8186bool
8187AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
8188 OperandInfoTy &Op,
8189 OperandInfoTy &Stream) {
8190 using namespace llvm::AMDGPU::SendMsg;
8191
8192 Msg.Loc = getLoc();
8193 if (isToken(AsmToken::Identifier) &&
8194 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8195 Msg.IsSymbolic = true;
8196 lex(); // skip message name
8197 } else if (!parseExpr(Msg.Val, "a message name")) {
8198 return false;
8199 }
8200
8201 if (trySkipToken(AsmToken::Comma)) {
8202 Op.IsDefined = true;
8203 Op.Loc = getLoc();
8204 if (isToken(AsmToken::Identifier) &&
8205 (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
8207 lex(); // skip operation name
8208 } else if (!parseExpr(Op.Val, "an operation name")) {
8209 return false;
8210 }
8211
8212 if (trySkipToken(AsmToken::Comma)) {
8213 Stream.IsDefined = true;
8214 Stream.Loc = getLoc();
8215 if (!parseExpr(Stream.Val))
8216 return false;
8217 }
8218 }
8219
8220 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
8221}
8222
8223bool
8224AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
8225 const OperandInfoTy &Op,
8226 const OperandInfoTy &Stream) {
8227 using namespace llvm::AMDGPU::SendMsg;
8228
8229 // Validation strictness depends on whether message is specified
8230 // in a symbolic or in a numeric form. In the latter case
8231 // only encoding possibility is checked.
8232 bool Strict = Msg.IsSymbolic;
8233
8234 if (Strict) {
8235 if (Msg.Val == OPR_ID_UNSUPPORTED) {
8236 Error(Msg.Loc, "specified message id is not supported on this GPU");
8237 return false;
8238 }
8239 } else {
8240 if (!isValidMsgId(Msg.Val, getSTI())) {
8241 Error(Msg.Loc, "invalid message id");
8242 return false;
8243 }
8244 }
8245 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
8246 if (Op.IsDefined) {
8247 Error(Op.Loc, "message does not support operations");
8248 } else {
8249 Error(Msg.Loc, "missing message operation");
8250 }
8251 return false;
8252 }
8253 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
8254 if (Op.Val == OPR_ID_UNSUPPORTED)
8255 Error(Op.Loc, "specified operation id is not supported on this GPU");
8256 else
8257 Error(Op.Loc, "invalid operation id");
8258 return false;
8259 }
8260 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
8261 Stream.IsDefined) {
8262 Error(Stream.Loc, "message operation does not support streams");
8263 return false;
8264 }
8265 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
8266 Error(Stream.Loc, "invalid message stream id");
8267 return false;
8268 }
8269 return true;
8270}
8271
8272ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
8273 using namespace llvm::AMDGPU::SendMsg;
8274
8275 int64_t ImmVal = 0;
8276 SMLoc Loc = getLoc();
8277
8278 if (trySkipId("sendmsg", AsmToken::LParen)) {
8279 OperandInfoTy Msg(OPR_ID_UNKNOWN);
8280 OperandInfoTy Op(OP_NONE_);
8281 OperandInfoTy Stream(STREAM_ID_NONE_);
8282 if (parseSendMsgBody(Msg, Op, Stream) &&
8283 validateSendMsg(Msg, Op, Stream)) {
8284 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
8285 } else {
8286 return ParseStatus::Failure;
8287 }
8288 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
8289 if (ImmVal < 0 || !isUInt<16>(ImmVal))
8290 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8291 } else {
8292 return ParseStatus::Failure;
8293 }
8294
8295 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
8296 return ParseStatus::Success;
8297}
8298
8299bool AMDGPUOperand::isSendMsg() const {
8300 return isImmTy(ImmTySendMsg);
8301}
8302
8303//===----------------------------------------------------------------------===//
8304// v_interp
8305//===----------------------------------------------------------------------===//
8306
8307ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
8308 StringRef Str;
8309 SMLoc S = getLoc();
8310
8311 if (!parseId(Str))
8312 return ParseStatus::NoMatch;
8313
8314 int Slot = StringSwitch<int>(Str)
8315 .Case("p10", 0)
8316 .Case("p20", 1)
8317 .Case("p0", 2)
8318 .Default(-1);
8319
8320 if (Slot == -1)
8321 return Error(S, "invalid interpolation slot");
8322
8323 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
8324 AMDGPUOperand::ImmTyInterpSlot));
8325 return ParseStatus::Success;
8326}
8327
8328ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
8329 StringRef Str;
8330 SMLoc S = getLoc();
8331
8332 if (!parseId(Str))
8333 return ParseStatus::NoMatch;
8334
8335 if (!Str.starts_with("attr"))
8336 return Error(S, "invalid interpolation attribute");
8337
8338 StringRef Chan = Str.take_back(2);
8339 int AttrChan = StringSwitch<int>(Chan)
8340 .Case(".x", 0)
8341 .Case(".y", 1)
8342 .Case(".z", 2)
8343 .Case(".w", 3)
8344 .Default(-1);
8345 if (AttrChan == -1)
8346 return Error(S, "invalid or missing interpolation attribute channel");
8347
8348 Str = Str.drop_back(2).drop_front(4);
8349
8350 uint8_t Attr;
8351 if (Str.getAsInteger(10, Attr))
8352 return Error(S, "invalid or missing interpolation attribute number");
8353
8354 if (Attr > 32)
8355 return Error(S, "out of bounds interpolation attribute number");
8356
8357 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
8358
8359 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
8360 AMDGPUOperand::ImmTyInterpAttr));
8361 Operands.push_back(AMDGPUOperand::CreateImm(
8362 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
8363 return ParseStatus::Success;
8364}
8365
8366//===----------------------------------------------------------------------===//
8367// exp
8368//===----------------------------------------------------------------------===//
8369
8370ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
8371 using namespace llvm::AMDGPU::Exp;
8372
8373 StringRef Str;
8374 SMLoc S = getLoc();
8375
8376 if (!parseId(Str))
8377 return ParseStatus::NoMatch;
8378
8379 unsigned Id = getTgtId(Str);
8380 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
8381 return Error(S, (Id == ET_INVALID)
8382 ? "invalid exp target"
8383 : "exp target is not supported on this GPU");
8384
8385 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
8386 AMDGPUOperand::ImmTyExpTgt));
8387 return ParseStatus::Success;
8388}
8389
8390//===----------------------------------------------------------------------===//
8391// parser helpers
8392//===----------------------------------------------------------------------===//
8393
8394bool
8395AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
8396 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
8397}
8398
8399bool
8400AMDGPUAsmParser::isId(const StringRef Id) const {
8401 return isId(getToken(), Id);
8402}
8403
8404bool
8405AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
8406 return getTokenKind() == Kind;
8407}
8408
8409StringRef AMDGPUAsmParser::getId() const {
8410 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
8411}
8412
8413bool
8414AMDGPUAsmParser::trySkipId(const StringRef Id) {
8415 if (isId(Id)) {
8416 lex();
8417 return true;
8418 }
8419 return false;
8420}
8421
8422bool
8423AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
8424 if (isToken(AsmToken::Identifier)) {
8425 StringRef Tok = getTokenStr();
8426 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
8427 lex();
8428 return true;
8429 }
8430 }
8431 return false;
8432}
8433
8434bool
8435AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
8436 if (isId(Id) && peekToken().is(Kind)) {
8437 lex();
8438 lex();
8439 return true;
8440 }
8441 return false;
8442}
8443
8444bool
8445AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
8446 if (isToken(Kind)) {
8447 lex();
8448 return true;
8449 }
8450 return false;
8451}
8452
8453bool
8454AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
8455 const StringRef ErrMsg) {
8456 if (!trySkipToken(Kind)) {
8457 Error(getLoc(), ErrMsg);
8458 return false;
8459 }
8460 return true;
8461}
8462
8463bool
8464AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
8465 SMLoc S = getLoc();
8466
8467 const MCExpr *Expr;
8468 if (Parser.parseExpression(Expr))
8469 return false;
8470
8471 if (Expr->evaluateAsAbsolute(Imm))
8472 return true;
8473
8474 if (Expected.empty()) {
8475 Error(S, "expected absolute expression");
8476 } else {
8477 Error(S, Twine("expected ", Expected) +
8478 Twine(" or an absolute expression"));
8479 }
8480 return false;
8481}
8482
8483bool
8484AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
8485 SMLoc S = getLoc();
8486
8487 const MCExpr *Expr;
8488 if (Parser.parseExpression(Expr))
8489 return false;
8490
8491 int64_t IntVal;
8492 if (Expr->evaluateAsAbsolute(IntVal)) {
8493 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
8494 } else {
8495 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
8496 }
8497 return true;
8498}
8499
8500bool
8501AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
8502 if (isToken(AsmToken::String)) {
8503 Val = getToken().getStringContents();
8504 lex();
8505 return true;
8506 }
8507 Error(getLoc(), ErrMsg);
8508 return false;
8509}
8510
8511bool
8512AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
8513 if (isToken(AsmToken::Identifier)) {
8514 Val = getTokenStr();
8515 lex();
8516 return true;
8517 }
8518 if (!ErrMsg.empty())
8519 Error(getLoc(), ErrMsg);
8520 return false;
8521}
8522
8523AsmToken
8524AMDGPUAsmParser::getToken() const {
8525 return Parser.getTok();
8526}
8527
8528AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
8529 return isToken(AsmToken::EndOfStatement)
8530 ? getToken()
8531 : getLexer().peekTok(ShouldSkipSpace);
8532}
8533
8534void
8535AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
8536 auto TokCount = getLexer().peekTokens(Tokens);
8537
8538 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
8539 Tokens[Idx] = AsmToken(AsmToken::Error, "");
8540}
8541
8543AMDGPUAsmParser::getTokenKind() const {
8544 return getLexer().getKind();
8545}
8546
8547SMLoc
8548AMDGPUAsmParser::getLoc() const {
8549 return getToken().getLoc();
8550}
8551
8552StringRef
8553AMDGPUAsmParser::getTokenStr() const {
8554 return getToken().getString();
8555}
8556
8557void
8558AMDGPUAsmParser::lex() {
8559 Parser.Lex();
8560}
8561
8562SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
8563 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
8564}
8565
8566SMLoc
8567AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
8568 const OperandVector &Operands) const {
8569 for (unsigned i = Operands.size() - 1; i > 0; --i) {
8570 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8571 if (Test(Op))
8572 return Op.getStartLoc();
8573 }
8574 return getInstLoc(Operands);
8575}
8576
8577SMLoc
8578AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
8579 const OperandVector &Operands) const {
8580 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
8581 return getOperandLoc(Test, Operands);
8582}
8583
8584SMLoc AMDGPUAsmParser::getRegLoc(MCRegister Reg,
8585 const OperandVector &Operands) const {
8586 auto Test = [=](const AMDGPUOperand& Op) {
8587 return Op.isRegKind() && Op.getReg() == Reg;
8588 };
8589 return getOperandLoc(Test, Operands);
8590}
8591
8592SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands,
8593 bool SearchMandatoryLiterals) const {
8594 auto Test = [](const AMDGPUOperand& Op) {
8595 return Op.IsImmKindLiteral() || Op.isExpr();
8596 };
8597 SMLoc Loc = getOperandLoc(Test, Operands);
8598 if (SearchMandatoryLiterals && Loc == getInstLoc(Operands))
8599 Loc = getMandatoryLitLoc(Operands);
8600 return Loc;
8601}
8602
8603SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const {
8604 auto Test = [](const AMDGPUOperand &Op) {
8605 return Op.IsImmKindMandatoryLiteral();
8606 };
8607 return getOperandLoc(Test, Operands);
8608}
8609
8610SMLoc
8611AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
8612 auto Test = [](const AMDGPUOperand& Op) {
8613 return Op.isImmKindConst();
8614 };
8615 return getOperandLoc(Test, Operands);
8616}
8617
8618ParseStatus
8619AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
8620 if (!trySkipToken(AsmToken::LCurly))
8621 return ParseStatus::NoMatch;
8622
8623 bool First = true;
8624 while (!trySkipToken(AsmToken::RCurly)) {
8625 if (!First &&
8626 !skipToken(AsmToken::Comma, "comma or closing brace expected"))
8627 return ParseStatus::Failure;
8628
8629 StringRef Id = getTokenStr();
8630 SMLoc IdLoc = getLoc();
8631 if (!skipToken(AsmToken::Identifier, "field name expected") ||
8632 !skipToken(AsmToken::Colon, "colon expected"))
8633 return ParseStatus::Failure;
8634
8635 const auto *I =
8636 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
8637 if (I == Fields.end())
8638 return Error(IdLoc, "unknown field");
8639 if ((*I)->IsDefined)
8640 return Error(IdLoc, "duplicate field");
8641
8642 // TODO: Support symbolic values.
8643 (*I)->Loc = getLoc();
8644 if (!parseExpr((*I)->Val))
8645 return ParseStatus::Failure;
8646 (*I)->IsDefined = true;
8647
8648 First = false;
8649 }
8650 return ParseStatus::Success;
8651}
8652
8653bool AMDGPUAsmParser::validateStructuredOpFields(
8655 return all_of(Fields, [this](const StructuredOpField *F) {
8656 return F->validate(*this);
8657 });
8658}
8659
8660//===----------------------------------------------------------------------===//
8661// swizzle
8662//===----------------------------------------------------------------------===//
8663
8665static unsigned
8666encodeBitmaskPerm(const unsigned AndMask,
8667 const unsigned OrMask,
8668 const unsigned XorMask) {
8669 using namespace llvm::AMDGPU::Swizzle;
8670
8671 return BITMASK_PERM_ENC |
8672 (AndMask << BITMASK_AND_SHIFT) |
8673 (OrMask << BITMASK_OR_SHIFT) |
8674 (XorMask << BITMASK_XOR_SHIFT);
8675}
8676
8677bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
8678 const unsigned MaxVal,
8679 const Twine &ErrMsg, SMLoc &Loc) {
8680 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8681 return false;
8682 }
8683 Loc = getLoc();
8684 if (!parseExpr(Op)) {
8685 return false;
8686 }
8687 if (Op < MinVal || Op > MaxVal) {
8688 Error(Loc, ErrMsg);
8689 return false;
8690 }
8691
8692 return true;
8693}
8694
8695bool
8696AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
8697 const unsigned MinVal,
8698 const unsigned MaxVal,
8699 const StringRef ErrMsg) {
8700 SMLoc Loc;
8701 for (unsigned i = 0; i < OpNum; ++i) {
8702 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
8703 return false;
8704 }
8705
8706 return true;
8707}
8708
8709bool
8710AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
8711 using namespace llvm::AMDGPU::Swizzle;
8712
8713 int64_t Lane[LANE_NUM];
8714 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8715 "expected a 2-bit lane id")) {
8717 for (unsigned I = 0; I < LANE_NUM; ++I) {
8718 Imm |= Lane[I] << (LANE_SHIFT * I);
8719 }
8720 return true;
8721 }
8722 return false;
8723}
8724
8725bool
8726AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
8727 using namespace llvm::AMDGPU::Swizzle;
8728
8729 SMLoc Loc;
8730 int64_t GroupSize;
8731 int64_t LaneIdx;
8732
8733 if (!parseSwizzleOperand(GroupSize,
8734 2, 32,
8735 "group size must be in the interval [2,32]",
8736 Loc)) {
8737 return false;
8738 }
8739 if (!isPowerOf2_64(GroupSize)) {
8740 Error(Loc, "group size must be a power of two");
8741 return false;
8742 }
8743 if (parseSwizzleOperand(LaneIdx,
8744 0, GroupSize - 1,
8745 "lane id must be in the interval [0,group size - 1]",
8746 Loc)) {
8747 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
8748 return true;
8749 }
8750 return false;
8751}
8752
8753bool
8754AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8755 using namespace llvm::AMDGPU::Swizzle;
8756
8757 SMLoc Loc;
8758 int64_t GroupSize;
8759
8760 if (!parseSwizzleOperand(GroupSize,
8761 2, 32,
8762 "group size must be in the interval [2,32]",
8763 Loc)) {
8764 return false;
8765 }
8766 if (!isPowerOf2_64(GroupSize)) {
8767 Error(Loc, "group size must be a power of two");
8768 return false;
8769 }
8770
8771 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
8772 return true;
8773}
8774
8775bool
8776AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8777 using namespace llvm::AMDGPU::Swizzle;
8778
8779 SMLoc Loc;
8780 int64_t GroupSize;
8781
8782 if (!parseSwizzleOperand(GroupSize,
8783 1, 16,
8784 "group size must be in the interval [1,16]",
8785 Loc)) {
8786 return false;
8787 }
8788 if (!isPowerOf2_64(GroupSize)) {
8789 Error(Loc, "group size must be a power of two");
8790 return false;
8791 }
8792
8793 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
8794 return true;
8795}
8796
8797bool
8798AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8799 using namespace llvm::AMDGPU::Swizzle;
8800
8801 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8802 return false;
8803 }
8804
8805 StringRef Ctl;
8806 SMLoc StrLoc = getLoc();
8807 if (!parseString(Ctl)) {
8808 return false;
8809 }
8810 if (Ctl.size() != BITMASK_WIDTH) {
8811 Error(StrLoc, "expected a 5-character mask");
8812 return false;
8813 }
8814
8815 unsigned AndMask = 0;
8816 unsigned OrMask = 0;
8817 unsigned XorMask = 0;
8818
8819 for (size_t i = 0; i < Ctl.size(); ++i) {
8820 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8821 switch(Ctl[i]) {
8822 default:
8823 Error(StrLoc, "invalid mask");
8824 return false;
8825 case '0':
8826 break;
8827 case '1':
8828 OrMask |= Mask;
8829 break;
8830 case 'p':
8831 AndMask |= Mask;
8832 break;
8833 case 'i':
8834 AndMask |= Mask;
8835 XorMask |= Mask;
8836 break;
8837 }
8838 }
8839
8840 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8841 return true;
8842}
8843
8844bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &Imm) {
8845 using namespace llvm::AMDGPU::Swizzle;
8846
8847 if (!AMDGPU::isGFX9Plus(getSTI())) {
8848 Error(getLoc(), "FFT mode swizzle not supported on this GPU");
8849 return false;
8850 }
8851
8852 int64_t Swizzle;
8853 SMLoc Loc;
8854 if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
8855 "FFT swizzle must be in the interval [0," +
8856 Twine(FFT_SWIZZLE_MAX) + Twine(']'),
8857 Loc))
8858 return false;
8859
8860 Imm = FFT_MODE_ENC | Swizzle;
8861 return true;
8862}
8863
8864bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &Imm) {
8865 using namespace llvm::AMDGPU::Swizzle;
8866
8867 if (!AMDGPU::isGFX9Plus(getSTI())) {
8868 Error(getLoc(), "Rotate mode swizzle not supported on this GPU");
8869 return false;
8870 }
8871
8872 SMLoc Loc;
8873 int64_t Direction;
8874
8875 if (!parseSwizzleOperand(Direction, 0, 1,
8876 "direction must be 0 (left) or 1 (right)", Loc))
8877 return false;
8878
8879 int64_t RotateSize;
8880 if (!parseSwizzleOperand(
8881 RotateSize, 0, ROTATE_MAX_SIZE,
8882 "number of threads to rotate must be in the interval [0," +
8883 Twine(ROTATE_MAX_SIZE) + Twine(']'),
8884 Loc))
8885 return false;
8886
8888 (RotateSize << ROTATE_SIZE_SHIFT);
8889 return true;
8890}
8891
8892bool
8893AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8894
8895 SMLoc OffsetLoc = getLoc();
8896
8897 if (!parseExpr(Imm, "a swizzle macro")) {
8898 return false;
8899 }
8900 if (!isUInt<16>(Imm)) {
8901 Error(OffsetLoc, "expected a 16-bit offset");
8902 return false;
8903 }
8904 return true;
8905}
8906
8907bool
8908AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8909 using namespace llvm::AMDGPU::Swizzle;
8910
8911 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
8912
8913 SMLoc ModeLoc = getLoc();
8914 bool Ok = false;
8915
8916 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8917 Ok = parseSwizzleQuadPerm(Imm);
8918 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8919 Ok = parseSwizzleBitmaskPerm(Imm);
8920 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8921 Ok = parseSwizzleBroadcast(Imm);
8922 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
8923 Ok = parseSwizzleSwap(Imm);
8924 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8925 Ok = parseSwizzleReverse(Imm);
8926 } else if (trySkipId(IdSymbolic[ID_FFT])) {
8927 Ok = parseSwizzleFFT(Imm);
8928 } else if (trySkipId(IdSymbolic[ID_ROTATE])) {
8929 Ok = parseSwizzleRotate(Imm);
8930 } else {
8931 Error(ModeLoc, "expected a swizzle mode");
8932 }
8933
8934 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
8935 }
8936
8937 return false;
8938}
8939
8940ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8941 SMLoc S = getLoc();
8942 int64_t Imm = 0;
8943
8944 if (trySkipId("offset")) {
8945
8946 bool Ok = false;
8947 if (skipToken(AsmToken::Colon, "expected a colon")) {
8948 if (trySkipId("swizzle")) {
8949 Ok = parseSwizzleMacro(Imm);
8950 } else {
8951 Ok = parseSwizzleOffset(Imm);
8952 }
8953 }
8954
8955 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8956
8958 }
8959 return ParseStatus::NoMatch;
8960}
8961
8962bool
8963AMDGPUOperand::isSwizzle() const {
8964 return isImmTy(ImmTySwizzle);
8965}
8966
8967//===----------------------------------------------------------------------===//
8968// VGPR Index Mode
8969//===----------------------------------------------------------------------===//
8970
8971int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8972
8973 using namespace llvm::AMDGPU::VGPRIndexMode;
8974
8975 if (trySkipToken(AsmToken::RParen)) {
8976 return OFF;
8977 }
8978
8979 int64_t Imm = 0;
8980
8981 while (true) {
8982 unsigned Mode = 0;
8983 SMLoc S = getLoc();
8984
8985 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8986 if (trySkipId(IdSymbolic[ModeId])) {
8987 Mode = 1 << ModeId;
8988 break;
8989 }
8990 }
8991
8992 if (Mode == 0) {
8993 Error(S, (Imm == 0)?
8994 "expected a VGPR index mode or a closing parenthesis" :
8995 "expected a VGPR index mode");
8996 return UNDEF;
8997 }
8998
8999 if (Imm & Mode) {
9000 Error(S, "duplicate VGPR index mode");
9001 return UNDEF;
9002 }
9003 Imm |= Mode;
9004
9005 if (trySkipToken(AsmToken::RParen))
9006 break;
9007 if (!skipToken(AsmToken::Comma,
9008 "expected a comma or a closing parenthesis"))
9009 return UNDEF;
9010 }
9011
9012 return Imm;
9013}
9014
9015ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
9016
9017 using namespace llvm::AMDGPU::VGPRIndexMode;
9018
9019 int64_t Imm = 0;
9020 SMLoc S = getLoc();
9021
9022 if (trySkipId("gpr_idx", AsmToken::LParen)) {
9023 Imm = parseGPRIdxMacro();
9024 if (Imm == UNDEF)
9025 return ParseStatus::Failure;
9026 } else {
9027 if (getParser().parseAbsoluteExpression(Imm))
9028 return ParseStatus::Failure;
9029 if (Imm < 0 || !isUInt<4>(Imm))
9030 return Error(S, "invalid immediate: only 4-bit values are legal");
9031 }
9032
9033 Operands.push_back(
9034 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
9035 return ParseStatus::Success;
9036}
9037
9038bool AMDGPUOperand::isGPRIdxMode() const {
9039 return isImmTy(ImmTyGprIdxMode);
9040}
9041
9042//===----------------------------------------------------------------------===//
9043// sopp branch targets
9044//===----------------------------------------------------------------------===//
9045
9046ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
9047
9048 // Make sure we are not parsing something
9049 // that looks like a label or an expression but is not.
9050 // This will improve error messages.
9051 if (isRegister() || isModifier())
9052 return ParseStatus::NoMatch;
9053
9054 if (!parseExpr(Operands))
9055 return ParseStatus::Failure;
9056
9057 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
9058 assert(Opr.isImm() || Opr.isExpr());
9059 SMLoc Loc = Opr.getStartLoc();
9060
9061 // Currently we do not support arbitrary expressions as branch targets.
9062 // Only labels and absolute expressions are accepted.
9063 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
9064 Error(Loc, "expected an absolute expression or a label");
9065 } else if (Opr.isImm() && !Opr.isS16Imm()) {
9066 Error(Loc, "expected a 16-bit signed jump offset");
9067 }
9068
9069 return ParseStatus::Success;
9070}
9071
9072//===----------------------------------------------------------------------===//
9073// Boolean holding registers
9074//===----------------------------------------------------------------------===//
9075
9076ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
9077 return parseReg(Operands);
9078}
9079
9080//===----------------------------------------------------------------------===//
9081// mubuf
9082//===----------------------------------------------------------------------===//
9083
9084void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
9085 const OperandVector &Operands,
9086 bool IsAtomic) {
9087 OptionalImmIndexMap OptionalIdx;
9088 unsigned FirstOperandIdx = 1;
9089 bool IsAtomicReturn = false;
9090
9091 if (IsAtomic) {
9092 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
9094 }
9095
9096 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
9097 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9098
9099 // Add the register arguments
9100 if (Op.isReg()) {
9101 Op.addRegOperands(Inst, 1);
9102 // Insert a tied src for atomic return dst.
9103 // This cannot be postponed as subsequent calls to
9104 // addImmOperands rely on correct number of MC operands.
9105 if (IsAtomicReturn && i == FirstOperandIdx)
9106 Op.addRegOperands(Inst, 1);
9107 continue;
9108 }
9109
9110 // Handle the case where soffset is an immediate
9111 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
9112 Op.addImmOperands(Inst, 1);
9113 continue;
9114 }
9115
9116 // Handle tokens like 'offen' which are sometimes hard-coded into the
9117 // asm string. There are no MCInst operands for these.
9118 if (Op.isToken()) {
9119 continue;
9120 }
9121 assert(Op.isImm());
9122
9123 // Handle optional arguments
9124 OptionalIdx[Op.getImmTy()] = i;
9125 }
9126
9127 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
9128 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
9129}
9130
9131//===----------------------------------------------------------------------===//
9132// smrd
9133//===----------------------------------------------------------------------===//
9134
9135bool AMDGPUOperand::isSMRDOffset8() const {
9136 return isImmLiteral() && isUInt<8>(getImm());
9137}
9138
9139bool AMDGPUOperand::isSMEMOffset() const {
9140 // Offset range is checked later by validator.
9141 return isImmLiteral();
9142}
9143
9144bool AMDGPUOperand::isSMRDLiteralOffset() const {
9145 // 32-bit literals are only supported on CI and we only want to use them
9146 // when the offset is > 8-bits.
9147 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
9148}
9149
9150//===----------------------------------------------------------------------===//
9151// vop3
9152//===----------------------------------------------------------------------===//
9153
9154static bool ConvertOmodMul(int64_t &Mul) {
9155 if (Mul != 1 && Mul != 2 && Mul != 4)
9156 return false;
9157
9158 Mul >>= 1;
9159 return true;
9160}
9161
9162static bool ConvertOmodDiv(int64_t &Div) {
9163 if (Div == 1) {
9164 Div = 0;
9165 return true;
9166 }
9167
9168 if (Div == 2) {
9169 Div = 3;
9170 return true;
9171 }
9172
9173 return false;
9174}
9175
9176// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
9177// This is intentional and ensures compatibility with sp3.
9178// See bug 35397 for details.
9179bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
9180 if (BoundCtrl == 0 || BoundCtrl == 1) {
9181 if (!isGFX11Plus())
9182 BoundCtrl = 1;
9183 return true;
9184 }
9185 return false;
9186}
9187
9188void AMDGPUAsmParser::onBeginOfFile() {
9189 if (!getParser().getStreamer().getTargetStreamer() ||
9190 getSTI().getTargetTriple().getArch() == Triple::r600)
9191 return;
9192
9193 if (!getTargetStreamer().getTargetID())
9194 getTargetStreamer().initializeTargetID(getSTI(),
9195 getSTI().getFeatureString());
9196
9197 if (isHsaAbi(getSTI()))
9198 getTargetStreamer().EmitDirectiveAMDGCNTarget();
9199}
9200
9201/// Parse AMDGPU specific expressions.
9202///
9203/// expr ::= or(expr, ...) |
9204/// max(expr, ...)
9205///
9206bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
9207 using AGVK = AMDGPUMCExpr::VariantKind;
9208
9209 if (isToken(AsmToken::Identifier)) {
9210 StringRef TokenId = getTokenStr();
9211 AGVK VK = StringSwitch<AGVK>(TokenId)
9212 .Case("max", AGVK::AGVK_Max)
9213 .Case("or", AGVK::AGVK_Or)
9214 .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs)
9215 .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
9216 .Case("alignto", AGVK::AGVK_AlignTo)
9217 .Case("occupancy", AGVK::AGVK_Occupancy)
9218 .Default(AGVK::AGVK_None);
9219
9220 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
9222 uint64_t CommaCount = 0;
9223 lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
9224 lex(); // Eat '('
9225 while (true) {
9226 if (trySkipToken(AsmToken::RParen)) {
9227 if (Exprs.empty()) {
9228 Error(getToken().getLoc(),
9229 "empty " + Twine(TokenId) + " expression");
9230 return true;
9231 }
9232 if (CommaCount + 1 != Exprs.size()) {
9233 Error(getToken().getLoc(),
9234 "mismatch of commas in " + Twine(TokenId) + " expression");
9235 return true;
9236 }
9237 Res = AMDGPUMCExpr::create(VK, Exprs, getContext());
9238 return false;
9239 }
9240 const MCExpr *Expr;
9241 if (getParser().parseExpression(Expr, EndLoc))
9242 return true;
9243 Exprs.push_back(Expr);
9244 bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
9245 if (LastTokenWasComma)
9246 CommaCount++;
9247 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
9248 Error(getToken().getLoc(),
9249 "unexpected token in " + Twine(TokenId) + " expression");
9250 return true;
9251 }
9252 }
9253 }
9254 }
9255 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
9256}
9257
9258ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
9259 StringRef Name = getTokenStr();
9260 if (Name == "mul") {
9261 return parseIntWithPrefix("mul", Operands,
9262 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
9263 }
9264
9265 if (Name == "div") {
9266 return parseIntWithPrefix("div", Operands,
9267 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
9268 }
9269
9270 return ParseStatus::NoMatch;
9271}
9272
9273// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
9274// the number of src operands present, then copies that bit into src0_modifiers.
9275static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
9276 int Opc = Inst.getOpcode();
9277 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9278 if (OpSelIdx == -1)
9279 return;
9280
9281 int SrcNum;
9282 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9283 AMDGPU::OpName::src2};
9284 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
9285 ++SrcNum)
9286 ;
9287 assert(SrcNum > 0);
9288
9289 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9290
9291 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
9292 if (DstIdx == -1)
9293 return;
9294
9295 const MCOperand &DstOp = Inst.getOperand(DstIdx);
9296 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
9297 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9298 if (DstOp.isReg() &&
9299 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
9301 ModVal |= SISrcMods::DST_OP_SEL;
9302 } else {
9303 if ((OpSel & (1 << SrcNum)) != 0)
9304 ModVal |= SISrcMods::DST_OP_SEL;
9305 }
9306 Inst.getOperand(ModIdx).setImm(ModVal);
9307}
9308
9309void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
9310 const OperandVector &Operands) {
9311 cvtVOP3P(Inst, Operands);
9312 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9313}
9314
9315void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
9316 OptionalImmIndexMap &OptionalIdx) {
9317 cvtVOP3P(Inst, Operands, OptionalIdx);
9318 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9319}
9320
9321static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
9322 return
9323 // 1. This operand is input modifiers
9324 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
9325 // 2. This is not last operand
9326 && Desc.NumOperands > (OpNum + 1)
9327 // 3. Next operand is register class
9328 && Desc.operands()[OpNum + 1].RegClass != -1
9329 // 4. Next register is not tied to any other operand
9330 && Desc.getOperandConstraint(OpNum + 1,
9332}
9333
9334void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
9335{
9336 OptionalImmIndexMap OptionalIdx;
9337 unsigned Opc = Inst.getOpcode();
9338
9339 unsigned I = 1;
9340 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9341 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9342 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9343 }
9344
9345 for (unsigned E = Operands.size(); I != E; ++I) {
9346 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9348 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9349 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
9350 Op.isInterpAttrChan()) {
9351 Inst.addOperand(MCOperand::createImm(Op.getImm()));
9352 } else if (Op.isImmModifier()) {
9353 OptionalIdx[Op.getImmTy()] = I;
9354 } else {
9355 llvm_unreachable("unhandled operand type");
9356 }
9357 }
9358
9359 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
9360 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9361 AMDGPUOperand::ImmTyHigh);
9362
9363 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9364 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9365 AMDGPUOperand::ImmTyClamp);
9366
9367 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9368 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9369 AMDGPUOperand::ImmTyOModSI);
9370}
9371
9372void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
9373{
9374 OptionalImmIndexMap OptionalIdx;
9375 unsigned Opc = Inst.getOpcode();
9376
9377 unsigned I = 1;
9378 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9379 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9380 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9381 }
9382
9383 for (unsigned E = Operands.size(); I != E; ++I) {
9384 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9386 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9387 } else if (Op.isImmModifier()) {
9388 OptionalIdx[Op.getImmTy()] = I;
9389 } else {
9390 llvm_unreachable("unhandled operand type");
9391 }
9392 }
9393
9394 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp);
9395
9396 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9397 if (OpSelIdx != -1)
9398 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9399
9400 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
9401
9402 if (OpSelIdx == -1)
9403 return;
9404
9405 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9406 AMDGPU::OpName::src2};
9407 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9408 AMDGPU::OpName::src1_modifiers,
9409 AMDGPU::OpName::src2_modifiers};
9410
9411 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9412
9413 for (int J = 0; J < 3; ++J) {
9414 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9415 if (OpIdx == -1)
9416 break;
9417
9418 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9419 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9420
9421 if ((OpSel & (1 << J)) != 0)
9422 ModVal |= SISrcMods::OP_SEL_0;
9423 if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
9424 (OpSel & (1 << 3)) != 0)
9425 ModVal |= SISrcMods::DST_OP_SEL;
9426
9427 Inst.getOperand(ModIdx).setImm(ModVal);
9428 }
9429}
9430void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
9431 const OperandVector &Operands) {
9432 OptionalImmIndexMap OptionalIdx;
9433 unsigned Opc = Inst.getOpcode();
9434 unsigned I = 1;
9435 int CbszOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
9436
9437 const MCInstrDesc &Desc = MII.get(Opc);
9438
9439 for (unsigned J = 0; J < Desc.getNumDefs(); ++J)
9440 static_cast<AMDGPUOperand &>(*Operands[I++]).addRegOperands(Inst, 1);
9441
9442 for (unsigned E = Operands.size(); I != E; ++I) {
9443 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[I]);
9444 int NumOperands = Inst.getNumOperands();
9445 // The order of operands in MCInst and parsed operands are different.
9446 // Adding dummy cbsz and blgp operands at corresponding MCInst operand
9447 // indices for parsing scale values correctly.
9448 if (NumOperands == CbszOpIdx) {
9451 }
9452 if (isRegOrImmWithInputMods(Desc, NumOperands)) {
9453 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9454 } else if (Op.isImmModifier()) {
9455 OptionalIdx[Op.getImmTy()] = I;
9456 } else {
9457 Op.addRegOrImmOperands(Inst, 1);
9458 }
9459 }
9460
9461 // Insert CBSZ and BLGP operands for F8F6F4 variants
9462 auto CbszIdx = OptionalIdx.find(AMDGPUOperand::ImmTyCBSZ);
9463 if (CbszIdx != OptionalIdx.end()) {
9464 int CbszVal = ((AMDGPUOperand &)*Operands[CbszIdx->second]).getImm();
9465 Inst.getOperand(CbszOpIdx).setImm(CbszVal);
9466 }
9467
9468 int BlgpOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
9469 auto BlgpIdx = OptionalIdx.find(AMDGPUOperand::ImmTyBLGP);
9470 if (BlgpIdx != OptionalIdx.end()) {
9471 int BlgpVal = ((AMDGPUOperand &)*Operands[BlgpIdx->second]).getImm();
9472 Inst.getOperand(BlgpOpIdx).setImm(BlgpVal);
9473 }
9474
9475 // Add dummy src_modifiers
9478
9479 // Handle op_sel fields
9480
9481 unsigned OpSel = 0;
9482 auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
9483 if (OpselIdx != OptionalIdx.end()) {
9484 OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])
9485 .getImm();
9486 }
9487
9488 unsigned OpSelHi = 0;
9489 auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
9490 if (OpselHiIdx != OptionalIdx.end()) {
9491 OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])
9492 .getImm();
9493 }
9494 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9495 AMDGPU::OpName::src1_modifiers};
9496
9497 for (unsigned J = 0; J < 2; ++J) {
9498 unsigned ModVal = 0;
9499 if (OpSel & (1 << J))
9500 ModVal |= SISrcMods::OP_SEL_0;
9501 if (OpSelHi & (1 << J))
9502 ModVal |= SISrcMods::OP_SEL_1;
9503
9504 const int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9505 Inst.getOperand(ModIdx).setImm(ModVal);
9506 }
9507}
9508
9509void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
9510 OptionalImmIndexMap &OptionalIdx) {
9511 unsigned Opc = Inst.getOpcode();
9512
9513 unsigned I = 1;
9514 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9515 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9516 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9517 }
9518
9519 for (unsigned E = Operands.size(); I != E; ++I) {
9520 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9522 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9523 } else if (Op.isImmModifier()) {
9524 OptionalIdx[Op.getImmTy()] = I;
9525 } else {
9526 Op.addRegOrImmOperands(Inst, 1);
9527 }
9528 }
9529
9530 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::scale_sel))
9531 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9532 AMDGPUOperand::ImmTyScaleSel);
9533
9534 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9535 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9536 AMDGPUOperand::ImmTyClamp);
9537
9538 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
9539 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
9540 Inst.addOperand(Inst.getOperand(0));
9541 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9542 AMDGPUOperand::ImmTyByteSel);
9543 }
9544
9545 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9546 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9547 AMDGPUOperand::ImmTyOModSI);
9548
9549 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
9550 // it has src2 register operand that is tied to dst operand
9551 // we don't allow modifiers for this operand in assembler so src2_modifiers
9552 // should be 0.
9553 if (isMAC(Opc)) {
9554 auto *it = Inst.begin();
9555 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
9556 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
9557 ++it;
9558 // Copy the operand to ensure it's not invalidated when Inst grows.
9559 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
9560 }
9561}
9562
9563void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
9564 OptionalImmIndexMap OptionalIdx;
9565 cvtVOP3(Inst, Operands, OptionalIdx);
9566}
9567
9568void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
9569 OptionalImmIndexMap &OptIdx) {
9570 const int Opc = Inst.getOpcode();
9571 const MCInstrDesc &Desc = MII.get(Opc);
9572
9573 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
9574
9575 if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
9576 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
9577 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
9578 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
9579 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
9580 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
9581 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
9582 Inst.addOperand(Inst.getOperand(0));
9583 }
9584
9585 // Adding vdst_in operand is already covered for these DPP instructions in
9586 // cvtVOP3DPP.
9587 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
9588 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx12 ||
9589 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx12 ||
9590 Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx12 ||
9591 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx12 ||
9592 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp_gfx12 ||
9593 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp_gfx12 ||
9594 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp8_gfx12 ||
9595 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp8_gfx12 ||
9596 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
9597 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9598 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp_gfx1250 ||
9599 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp8_gfx1250 ||
9600 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9601 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9602 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp_gfx1250 ||
9603 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp_gfx1250 ||
9604 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp8_gfx1250 ||
9605 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp8_gfx1250 ||
9606 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_gfx1250 ||
9607 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_gfx1250 ||
9608 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp_gfx1250 ||
9609 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp_gfx1250 ||
9610 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp8_gfx1250 ||
9611 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp8_gfx1250 ||
9612 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_gfx1250 ||
9613 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_gfx1250)) {
9614 Inst.addOperand(Inst.getOperand(0));
9615 }
9616
9617 int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::bitop3);
9618 if (BitOp3Idx != -1) {
9619 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9620 }
9621
9622 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
9623 // instruction, and then figure out where to actually put the modifiers
9624
9625 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9626 if (OpSelIdx != -1) {
9627 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
9628 }
9629
9630 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
9631 if (OpSelHiIdx != -1) {
9632 int DefaultVal = IsPacked ? -1 : 0;
9633 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
9634 DefaultVal);
9635 }
9636
9637 int MatrixAFMTIdx =
9638 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_fmt);
9639 if (MatrixAFMTIdx != -1) {
9640 addOptionalImmOperand(Inst, Operands, OptIdx,
9641 AMDGPUOperand::ImmTyMatrixAFMT, 0);
9642 }
9643
9644 int MatrixBFMTIdx =
9645 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_fmt);
9646 if (MatrixBFMTIdx != -1) {
9647 addOptionalImmOperand(Inst, Operands, OptIdx,
9648 AMDGPUOperand::ImmTyMatrixBFMT, 0);
9649 }
9650
9651 int MatrixAScaleIdx =
9652 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale);
9653 if (MatrixAScaleIdx != -1) {
9654 addOptionalImmOperand(Inst, Operands, OptIdx,
9655 AMDGPUOperand::ImmTyMatrixAScale, 0);
9656 }
9657
9658 int MatrixBScaleIdx =
9659 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale);
9660 if (MatrixBScaleIdx != -1) {
9661 addOptionalImmOperand(Inst, Operands, OptIdx,
9662 AMDGPUOperand::ImmTyMatrixBScale, 0);
9663 }
9664
9665 int MatrixAScaleFmtIdx =
9666 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale_fmt);
9667 if (MatrixAScaleFmtIdx != -1) {
9668 addOptionalImmOperand(Inst, Operands, OptIdx,
9669 AMDGPUOperand::ImmTyMatrixAScaleFmt, 0);
9670 }
9671
9672 int MatrixBScaleFmtIdx =
9673 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale_fmt);
9674 if (MatrixBScaleFmtIdx != -1) {
9675 addOptionalImmOperand(Inst, Operands, OptIdx,
9676 AMDGPUOperand::ImmTyMatrixBScaleFmt, 0);
9677 }
9678
9679 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_a_reuse))
9680 addOptionalImmOperand(Inst, Operands, OptIdx,
9681 AMDGPUOperand::ImmTyMatrixAReuse, 0);
9682
9683 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_b_reuse))
9684 addOptionalImmOperand(Inst, Operands, OptIdx,
9685 AMDGPUOperand::ImmTyMatrixBReuse, 0);
9686
9687 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
9688 if (NegLoIdx != -1)
9689 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
9690
9691 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
9692 if (NegHiIdx != -1)
9693 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
9694
9695 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9696 AMDGPU::OpName::src2};
9697 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9698 AMDGPU::OpName::src1_modifiers,
9699 AMDGPU::OpName::src2_modifiers};
9700
9701 unsigned OpSel = 0;
9702 unsigned OpSelHi = 0;
9703 unsigned NegLo = 0;
9704 unsigned NegHi = 0;
9705
9706 if (OpSelIdx != -1)
9707 OpSel = Inst.getOperand(OpSelIdx).getImm();
9708
9709 if (OpSelHiIdx != -1)
9710 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
9711
9712 if (NegLoIdx != -1)
9713 NegLo = Inst.getOperand(NegLoIdx).getImm();
9714
9715 if (NegHiIdx != -1)
9716 NegHi = Inst.getOperand(NegHiIdx).getImm();
9717
9718 for (int J = 0; J < 3; ++J) {
9719 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9720 if (OpIdx == -1)
9721 break;
9722
9723 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9724
9725 if (ModIdx == -1)
9726 continue;
9727
9728 uint32_t ModVal = 0;
9729
9730 const MCOperand &SrcOp = Inst.getOperand(OpIdx);
9731 if (SrcOp.isReg() && getMRI()
9732 ->getRegClass(AMDGPU::VGPR_16RegClassID)
9733 .contains(SrcOp.getReg())) {
9734 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(SrcOp.getReg(), *getMRI());
9735 if (VGPRSuffixIsHi)
9736 ModVal |= SISrcMods::OP_SEL_0;
9737 } else {
9738 if ((OpSel & (1 << J)) != 0)
9739 ModVal |= SISrcMods::OP_SEL_0;
9740 }
9741
9742 if ((OpSelHi & (1 << J)) != 0)
9743 ModVal |= SISrcMods::OP_SEL_1;
9744
9745 if ((NegLo & (1 << J)) != 0)
9746 ModVal |= SISrcMods::NEG;
9747
9748 if ((NegHi & (1 << J)) != 0)
9749 ModVal |= SISrcMods::NEG_HI;
9750
9751 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
9752 }
9753}
9754
9755void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
9756 OptionalImmIndexMap OptIdx;
9757 cvtVOP3(Inst, Operands, OptIdx);
9758 cvtVOP3P(Inst, Operands, OptIdx);
9759}
9760
9762 unsigned i, unsigned Opc,
9763 AMDGPU::OpName OpName) {
9764 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
9765 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
9766 else
9767 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
9768}
9769
9770void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
9771 unsigned Opc = Inst.getOpcode();
9772
9773 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
9774 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
9775 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
9776 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
9777 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
9778
9779 OptionalImmIndexMap OptIdx;
9780 for (unsigned i = 5; i < Operands.size(); ++i) {
9781 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9782 OptIdx[Op.getImmTy()] = i;
9783 }
9784
9785 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
9786 addOptionalImmOperand(Inst, Operands, OptIdx,
9787 AMDGPUOperand::ImmTyIndexKey8bit);
9788
9789 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
9790 addOptionalImmOperand(Inst, Operands, OptIdx,
9791 AMDGPUOperand::ImmTyIndexKey16bit);
9792
9793 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_32bit))
9794 addOptionalImmOperand(Inst, Operands, OptIdx,
9795 AMDGPUOperand::ImmTyIndexKey32bit);
9796
9797 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9798 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp);
9799
9800 cvtVOP3P(Inst, Operands, OptIdx);
9801}
9802
9803//===----------------------------------------------------------------------===//
9804// VOPD
9805//===----------------------------------------------------------------------===//
9806
9807ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
9808 if (!hasVOPD(getSTI()))
9809 return ParseStatus::NoMatch;
9810
9811 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
9812 SMLoc S = getLoc();
9813 lex();
9814 lex();
9815 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
9816 SMLoc OpYLoc = getLoc();
9817 StringRef OpYName;
9818 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
9819 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
9820 return ParseStatus::Success;
9821 }
9822 return Error(OpYLoc, "expected a VOPDY instruction after ::");
9823 }
9824 return ParseStatus::NoMatch;
9825}
9826
9827// Create VOPD MCInst operands using parsed assembler operands.
9828void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
9829 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9830
9831 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
9832 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
9834 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9835 return;
9836 }
9837 if (Op.isReg()) {
9838 Op.addRegOperands(Inst, 1);
9839 return;
9840 }
9841 if (Op.isImm()) {
9842 Op.addImmOperands(Inst, 1);
9843 return;
9844 }
9845 llvm_unreachable("Unhandled operand type in cvtVOPD");
9846 };
9847
9848 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
9849
9850 // MCInst operands are ordered as follows:
9851 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
9852
9853 for (auto CompIdx : VOPD::COMPONENTS) {
9854 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
9855 }
9856
9857 for (auto CompIdx : VOPD::COMPONENTS) {
9858 const auto &CInfo = InstInfo[CompIdx];
9859 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
9860 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
9861 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
9862 if (CInfo.hasSrc2Acc())
9863 addOp(CInfo.getIndexOfDstInParsedOperands());
9864 }
9865
9866 int BitOp3Idx =
9867 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::bitop3);
9868 if (BitOp3Idx != -1) {
9869 OptionalImmIndexMap OptIdx;
9870 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back());
9871 if (Op.isImm())
9872 OptIdx[Op.getImmTy()] = Operands.size() - 1;
9873
9874 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9875 }
9876}
9877
9878//===----------------------------------------------------------------------===//
9879// dpp
9880//===----------------------------------------------------------------------===//
9881
9882bool AMDGPUOperand::isDPP8() const {
9883 return isImmTy(ImmTyDPP8);
9884}
9885
9886bool AMDGPUOperand::isDPPCtrl() const {
9887 using namespace AMDGPU::DPP;
9888
9889 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
9890 if (result) {
9891 int64_t Imm = getImm();
9892 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
9893 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
9894 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
9895 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
9896 (Imm == DppCtrl::WAVE_SHL1) ||
9897 (Imm == DppCtrl::WAVE_ROL1) ||
9898 (Imm == DppCtrl::WAVE_SHR1) ||
9899 (Imm == DppCtrl::WAVE_ROR1) ||
9900 (Imm == DppCtrl::ROW_MIRROR) ||
9901 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
9902 (Imm == DppCtrl::BCAST15) ||
9903 (Imm == DppCtrl::BCAST31) ||
9904 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
9905 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
9906 }
9907 return false;
9908}
9909
9910//===----------------------------------------------------------------------===//
9911// mAI
9912//===----------------------------------------------------------------------===//
9913
9914bool AMDGPUOperand::isBLGP() const {
9915 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
9916}
9917
9918bool AMDGPUOperand::isS16Imm() const {
9919 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
9920}
9921
9922bool AMDGPUOperand::isU16Imm() const {
9923 return isImmLiteral() && isUInt<16>(getImm());
9924}
9925
9926//===----------------------------------------------------------------------===//
9927// dim
9928//===----------------------------------------------------------------------===//
9929
9930bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
9931 // We want to allow "dim:1D" etc.,
9932 // but the initial 1 is tokenized as an integer.
9933 std::string Token;
9934 if (isToken(AsmToken::Integer)) {
9935 SMLoc Loc = getToken().getEndLoc();
9936 Token = std::string(getTokenStr());
9937 lex();
9938 if (getLoc() != Loc)
9939 return false;
9940 }
9941
9942 StringRef Suffix;
9943 if (!parseId(Suffix))
9944 return false;
9945 Token += Suffix;
9946
9947 StringRef DimId = Token;
9948 DimId.consume_front("SQ_RSRC_IMG_");
9949
9950 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
9951 if (!DimInfo)
9952 return false;
9953
9954 Encoding = DimInfo->Encoding;
9955 return true;
9956}
9957
9958ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
9959 if (!isGFX10Plus())
9960 return ParseStatus::NoMatch;
9961
9962 SMLoc S = getLoc();
9963
9964 if (!trySkipId("dim", AsmToken::Colon))
9965 return ParseStatus::NoMatch;
9966
9967 unsigned Encoding;
9968 SMLoc Loc = getLoc();
9969 if (!parseDimId(Encoding))
9970 return Error(Loc, "invalid dim value");
9971
9972 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
9973 AMDGPUOperand::ImmTyDim));
9974 return ParseStatus::Success;
9975}
9976
9977//===----------------------------------------------------------------------===//
9978// dpp
9979//===----------------------------------------------------------------------===//
9980
9981ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
9982 SMLoc S = getLoc();
9983
9984 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
9985 return ParseStatus::NoMatch;
9986
9987 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
9988
9989 int64_t Sels[8];
9990
9991 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9992 return ParseStatus::Failure;
9993
9994 for (size_t i = 0; i < 8; ++i) {
9995 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9996 return ParseStatus::Failure;
9997
9998 SMLoc Loc = getLoc();
9999 if (getParser().parseAbsoluteExpression(Sels[i]))
10000 return ParseStatus::Failure;
10001 if (0 > Sels[i] || 7 < Sels[i])
10002 return Error(Loc, "expected a 3-bit value");
10003 }
10004
10005 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
10006 return ParseStatus::Failure;
10007
10008 unsigned DPP8 = 0;
10009 for (size_t i = 0; i < 8; ++i)
10010 DPP8 |= (Sels[i] << (i * 3));
10011
10012 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
10013 return ParseStatus::Success;
10014}
10015
10016bool
10017AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
10018 const OperandVector &Operands) {
10019 if (Ctrl == "row_newbcast")
10020 return isGFX90A();
10021
10022 if (Ctrl == "row_share" ||
10023 Ctrl == "row_xmask")
10024 return isGFX10Plus();
10025
10026 if (Ctrl == "wave_shl" ||
10027 Ctrl == "wave_shr" ||
10028 Ctrl == "wave_rol" ||
10029 Ctrl == "wave_ror" ||
10030 Ctrl == "row_bcast")
10031 return isVI() || isGFX9();
10032
10033 return Ctrl == "row_mirror" ||
10034 Ctrl == "row_half_mirror" ||
10035 Ctrl == "quad_perm" ||
10036 Ctrl == "row_shl" ||
10037 Ctrl == "row_shr" ||
10038 Ctrl == "row_ror";
10039}
10040
10041int64_t
10042AMDGPUAsmParser::parseDPPCtrlPerm() {
10043 // quad_perm:[%d,%d,%d,%d]
10044
10045 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
10046 return -1;
10047
10048 int64_t Val = 0;
10049 for (int i = 0; i < 4; ++i) {
10050 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
10051 return -1;
10052
10053 int64_t Temp;
10054 SMLoc Loc = getLoc();
10055 if (getParser().parseAbsoluteExpression(Temp))
10056 return -1;
10057 if (Temp < 0 || Temp > 3) {
10058 Error(Loc, "expected a 2-bit value");
10059 return -1;
10060 }
10061
10062 Val += (Temp << i * 2);
10063 }
10064
10065 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
10066 return -1;
10067
10068 return Val;
10069}
10070
10071int64_t
10072AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
10073 using namespace AMDGPU::DPP;
10074
10075 // sel:%d
10076
10077 int64_t Val;
10078 SMLoc Loc = getLoc();
10079
10080 if (getParser().parseAbsoluteExpression(Val))
10081 return -1;
10082
10083 struct DppCtrlCheck {
10084 int64_t Ctrl;
10085 int Lo;
10086 int Hi;
10087 };
10088
10089 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
10090 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
10091 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
10092 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
10093 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
10094 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
10095 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
10096 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
10097 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
10098 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
10099 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
10100 .Default({-1, 0, 0});
10101
10102 bool Valid;
10103 if (Check.Ctrl == -1) {
10104 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
10105 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
10106 } else {
10107 Valid = Check.Lo <= Val && Val <= Check.Hi;
10108 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
10109 }
10110
10111 if (!Valid) {
10112 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
10113 return -1;
10114 }
10115
10116 return Val;
10117}
10118
10119ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
10120 using namespace AMDGPU::DPP;
10121
10122 if (!isToken(AsmToken::Identifier) ||
10123 !isSupportedDPPCtrl(getTokenStr(), Operands))
10124 return ParseStatus::NoMatch;
10125
10126 SMLoc S = getLoc();
10127 int64_t Val = -1;
10128 StringRef Ctrl;
10129
10130 parseId(Ctrl);
10131
10132 if (Ctrl == "row_mirror") {
10133 Val = DppCtrl::ROW_MIRROR;
10134 } else if (Ctrl == "row_half_mirror") {
10135 Val = DppCtrl::ROW_HALF_MIRROR;
10136 } else {
10137 if (skipToken(AsmToken::Colon, "expected a colon")) {
10138 if (Ctrl == "quad_perm") {
10139 Val = parseDPPCtrlPerm();
10140 } else {
10141 Val = parseDPPCtrlSel(Ctrl);
10142 }
10143 }
10144 }
10145
10146 if (Val == -1)
10147 return ParseStatus::Failure;
10148
10149 Operands.push_back(
10150 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
10151 return ParseStatus::Success;
10152}
10153
10154void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
10155 bool IsDPP8) {
10156 OptionalImmIndexMap OptionalIdx;
10157 unsigned Opc = Inst.getOpcode();
10158 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10159
10160 // MAC instructions are special because they have 'old'
10161 // operand which is not tied to dst (but assumed to be).
10162 // They also have dummy unused src2_modifiers.
10163 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
10164 int Src2ModIdx =
10165 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
10166 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
10167 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
10168
10169 unsigned I = 1;
10170 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10171 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10172 }
10173
10174 int Fi = 0;
10175 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
10176 bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
10177 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
10178 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
10179 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
10180
10181 for (unsigned E = Operands.size(); I != E; ++I) {
10182
10183 if (IsMAC) {
10184 int NumOperands = Inst.getNumOperands();
10185 if (OldIdx == NumOperands) {
10186 // Handle old operand
10187 constexpr int DST_IDX = 0;
10188 Inst.addOperand(Inst.getOperand(DST_IDX));
10189 } else if (Src2ModIdx == NumOperands) {
10190 // Add unused dummy src2_modifiers
10192 }
10193 }
10194
10195 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
10196 Inst.addOperand(Inst.getOperand(0));
10197 }
10198
10199 if (IsVOP3CvtSrDpp) {
10200 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
10202 Inst.addOperand(MCOperand::createReg(MCRegister()));
10203 }
10204 }
10205
10206 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10208 if (TiedTo != -1) {
10209 assert((unsigned)TiedTo < Inst.getNumOperands());
10210 // handle tied old or src2 for MAC instructions
10211 Inst.addOperand(Inst.getOperand(TiedTo));
10212 }
10213 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10214 // Add the register arguments
10215 if (IsDPP8 && Op.isDppFI()) {
10216 Fi = Op.getImm();
10217 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10218 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10219 } else if (Op.isReg()) {
10220 Op.addRegOperands(Inst, 1);
10221 } else if (Op.isImm() &&
10222 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
10223 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
10224 Op.addImmOperands(Inst, 1);
10225 } else if (Op.isImm()) {
10226 OptionalIdx[Op.getImmTy()] = I;
10227 } else {
10228 llvm_unreachable("unhandled operand type");
10229 }
10230 }
10231
10232 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp) && !IsVOP3CvtSrDpp)
10233 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10234 AMDGPUOperand::ImmTyClamp);
10235
10236 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
10237 if (VdstInIdx == static_cast<int>(Inst.getNumOperands()))
10238 Inst.addOperand(Inst.getOperand(0));
10239 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10240 AMDGPUOperand::ImmTyByteSel);
10241 }
10242
10243 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10244 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
10245
10246 if (Desc.TSFlags & SIInstrFlags::VOP3P)
10247 cvtVOP3P(Inst, Operands, OptionalIdx);
10248 else if (Desc.TSFlags & SIInstrFlags::VOP3)
10249 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
10250 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
10251 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
10252 }
10253
10254 if (IsDPP8) {
10255 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
10256 using namespace llvm::AMDGPU::DPP;
10257 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10258 } else {
10259 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
10260 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10261 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10262 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10263
10264 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
10265 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10266 AMDGPUOperand::ImmTyDppFI);
10267 }
10268}
10269
10270void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
10271 OptionalImmIndexMap OptionalIdx;
10272
10273 unsigned I = 1;
10274 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10275 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10276 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10277 }
10278
10279 int Fi = 0;
10280 for (unsigned E = Operands.size(); I != E; ++I) {
10281 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10283 if (TiedTo != -1) {
10284 assert((unsigned)TiedTo < Inst.getNumOperands());
10285 // handle tied old or src2 for MAC instructions
10286 Inst.addOperand(Inst.getOperand(TiedTo));
10287 }
10288 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10289 // Add the register arguments
10290 if (Op.isReg() && validateVccOperand(Op.getReg())) {
10291 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
10292 // Skip it.
10293 continue;
10294 }
10295
10296 if (IsDPP8) {
10297 if (Op.isDPP8()) {
10298 Op.addImmOperands(Inst, 1);
10299 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10300 Op.addRegWithFPInputModsOperands(Inst, 2);
10301 } else if (Op.isDppFI()) {
10302 Fi = Op.getImm();
10303 } else if (Op.isReg()) {
10304 Op.addRegOperands(Inst, 1);
10305 } else {
10306 llvm_unreachable("Invalid operand type");
10307 }
10308 } else {
10310 Op.addRegWithFPInputModsOperands(Inst, 2);
10311 } else if (Op.isReg()) {
10312 Op.addRegOperands(Inst, 1);
10313 } else if (Op.isDPPCtrl()) {
10314 Op.addImmOperands(Inst, 1);
10315 } else if (Op.isImm()) {
10316 // Handle optional arguments
10317 OptionalIdx[Op.getImmTy()] = I;
10318 } else {
10319 llvm_unreachable("Invalid operand type");
10320 }
10321 }
10322 }
10323
10324 if (IsDPP8) {
10325 using namespace llvm::AMDGPU::DPP;
10326 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10327 } else {
10328 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10329 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10330 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10331 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
10332 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10333 AMDGPUOperand::ImmTyDppFI);
10334 }
10335 }
10336}
10337
10338//===----------------------------------------------------------------------===//
10339// sdwa
10340//===----------------------------------------------------------------------===//
10341
10342ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
10343 StringRef Prefix,
10344 AMDGPUOperand::ImmTy Type) {
10345 return parseStringOrIntWithPrefix(
10346 Operands, Prefix,
10347 {"BYTE_0", "BYTE_1", "BYTE_2", "BYTE_3", "WORD_0", "WORD_1", "DWORD"},
10348 Type);
10349}
10350
10351ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
10352 return parseStringOrIntWithPrefix(
10353 Operands, "dst_unused", {"UNUSED_PAD", "UNUSED_SEXT", "UNUSED_PRESERVE"},
10354 AMDGPUOperand::ImmTySDWADstUnused);
10355}
10356
10357void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
10358 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
10359}
10360
10361void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
10362 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
10363}
10364
10365void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
10366 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
10367}
10368
10369void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
10370 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
10371}
10372
10373void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
10374 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
10375}
10376
10377void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
10378 uint64_t BasicInstType,
10379 bool SkipDstVcc,
10380 bool SkipSrcVcc) {
10381 using namespace llvm::AMDGPU::SDWA;
10382
10383 OptionalImmIndexMap OptionalIdx;
10384 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
10385 bool SkippedVcc = false;
10386
10387 unsigned I = 1;
10388 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10389 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10390 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10391 }
10392
10393 for (unsigned E = Operands.size(); I != E; ++I) {
10394 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10395 if (SkipVcc && !SkippedVcc && Op.isReg() &&
10396 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
10397 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
10398 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
10399 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
10400 // Skip VCC only if we didn't skip it on previous iteration.
10401 // Note that src0 and src1 occupy 2 slots each because of modifiers.
10402 if (BasicInstType == SIInstrFlags::VOP2 &&
10403 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
10404 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
10405 SkippedVcc = true;
10406 continue;
10407 }
10408 if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) {
10409 SkippedVcc = true;
10410 continue;
10411 }
10412 }
10414 Op.addRegOrImmWithInputModsOperands(Inst, 2);
10415 } else if (Op.isImm()) {
10416 // Handle optional arguments
10417 OptionalIdx[Op.getImmTy()] = I;
10418 } else {
10419 llvm_unreachable("Invalid operand type");
10420 }
10421 SkippedVcc = false;
10422 }
10423
10424 const unsigned Opc = Inst.getOpcode();
10425 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
10426 Opc != AMDGPU::V_NOP_sdwa_vi) {
10427 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
10428 switch (BasicInstType) {
10429 case SIInstrFlags::VOP1:
10430 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
10431 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10432 AMDGPUOperand::ImmTyClamp, 0);
10433
10434 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10435 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10436 AMDGPUOperand::ImmTyOModSI, 0);
10437
10438 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
10439 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10440 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10441
10442 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
10443 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10444 AMDGPUOperand::ImmTySDWADstUnused,
10445 DstUnused::UNUSED_PRESERVE);
10446
10447 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10448 break;
10449
10450 case SIInstrFlags::VOP2:
10451 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10452 AMDGPUOperand::ImmTyClamp, 0);
10453
10454 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
10455 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
10456
10457 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10458 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
10459 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10460 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10461 break;
10462
10463 case SIInstrFlags::VOPC:
10464 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
10465 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10466 AMDGPUOperand::ImmTyClamp, 0);
10467 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10468 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10469 break;
10470
10471 default:
10472 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
10473 }
10474 }
10475
10476 // special case v_mac_{f16, f32}:
10477 // it has src2 register operand that is tied to dst operand
10478 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
10479 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
10480 auto *it = Inst.begin();
10481 std::advance(
10482 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
10483 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
10484 }
10485}
10486
10487/// Force static initialization.
10488extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
10493
10494#define GET_REGISTER_MATCHER
10495#define GET_MATCHER_IMPLEMENTATION
10496#define GET_MNEMONIC_SPELL_CHECKER
10497#define GET_MNEMONIC_CHECKER
10498#include "AMDGPUGenAsmMatcher.inc"
10499
10500ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
10501 unsigned MCK) {
10502 switch (MCK) {
10503 case MCK_addr64:
10504 return parseTokenOp("addr64", Operands);
10505 case MCK_done:
10506 return parseTokenOp("done", Operands);
10507 case MCK_idxen:
10508 return parseTokenOp("idxen", Operands);
10509 case MCK_lds:
10510 return parseTokenOp("lds", Operands);
10511 case MCK_offen:
10512 return parseTokenOp("offen", Operands);
10513 case MCK_off:
10514 return parseTokenOp("off", Operands);
10515 case MCK_row_95_en:
10516 return parseTokenOp("row_en", Operands);
10517 case MCK_gds:
10518 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
10519 case MCK_tfe:
10520 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
10521 }
10522 return tryCustomParseOperand(Operands, MCK);
10523}
10524
10525// This function should be defined after auto-generated include so that we have
10526// MatchClassKind enum defined
10527unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
10528 unsigned Kind) {
10529 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
10530 // But MatchInstructionImpl() expects to meet token and fails to validate
10531 // operand. This method checks if we are given immediate operand but expect to
10532 // get corresponding token.
10533 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
10534 switch (Kind) {
10535 case MCK_addr64:
10536 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
10537 case MCK_gds:
10538 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
10539 case MCK_lds:
10540 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
10541 case MCK_idxen:
10542 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
10543 case MCK_offen:
10544 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
10545 case MCK_tfe:
10546 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
10547 case MCK_SSrc_b32:
10548 // When operands have expression values, they will return true for isToken,
10549 // because it is not possible to distinguish between a token and an
10550 // expression at parse time. MatchInstructionImpl() will always try to
10551 // match an operand as a token, when isToken returns true, and when the
10552 // name of the expression is not a valid token, the match will fail,
10553 // so we need to handle it here.
10554 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
10555 case MCK_SSrc_f32:
10556 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
10557 case MCK_SOPPBrTarget:
10558 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
10559 case MCK_VReg32OrOff:
10560 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
10561 case MCK_InterpSlot:
10562 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
10563 case MCK_InterpAttr:
10564 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
10565 case MCK_InterpAttrChan:
10566 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
10567 case MCK_SReg_64:
10568 case MCK_SReg_64_XEXEC:
10569 // Null is defined as a 32-bit register but
10570 // it should also be enabled with 64-bit operands or larger.
10571 // The following code enables it for SReg_64 and larger operands
10572 // used as source and destination. Remaining source
10573 // operands are handled in isInlinableImm.
10574 case MCK_SReg_96:
10575 case MCK_SReg_128:
10576 case MCK_SReg_256:
10577 case MCK_SReg_512:
10578 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
10579 default:
10580 return Match_InvalidOperand;
10581 }
10582}
10583
10584//===----------------------------------------------------------------------===//
10585// endpgm
10586//===----------------------------------------------------------------------===//
10587
10588ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
10589 SMLoc S = getLoc();
10590 int64_t Imm = 0;
10591
10592 if (!parseExpr(Imm)) {
10593 // The operand is optional, if not present default to 0
10594 Imm = 0;
10595 }
10596
10597 if (!isUInt<16>(Imm))
10598 return Error(S, "expected a 16-bit value");
10599
10600 Operands.push_back(
10601 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
10602 return ParseStatus::Success;
10603}
10604
10605bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
10606
10607//===----------------------------------------------------------------------===//
10608// Split Barrier
10609//===----------------------------------------------------------------------===//
10610
10611bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
#define Success
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
SmallVector< int16_t, MAX_SRC_OPERANDS_NUM > OperandIndices
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, AMDGPU::OpName OpName)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static bool IsRevOpcode(const unsigned Opcode)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static MCRegister getSpecialRegForName(StringRef RegName)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0, std::optional< unsigned > InsertAt=std::nullopt)
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDGPU metadata definitions and in-memory representations.
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
#define LLVM_READNONE
Definition Compiler.h:315
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
@ Default
#define Check(C,...)
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
Loop::LoopBounds::Direction Direction
Definition LoopInfo.cpp:243
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
mir Rename Register Operands
Register Reg
Register const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(PassOpts->AAPipeline)
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Interface definition for SIInstrInfo.
unsigned unsigned DefaultVal
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file implements the SmallBitVector class.
static bool Enabled
Definition Statistic.cpp:46
StringSet - A set-like wrapper for the StringMap.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
BinaryOperator * Mul
static const char * getRegisterName(MCRegister Reg)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:6057
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
iterator end() const
Definition ArrayRef.h:136
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
Target independent representation for an assembler token.
Definition MCAsmMacro.h:22
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition MCAsmMacro.h:103
bool is(TokenKind K) const
Definition MCAsmMacro.h:75
Register getReg() const
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
void printExpr(raw_ostream &, const MCExpr &) const
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:343
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:353
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:428
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getNumOperands() const
Definition MCInst.h:212
SMLoc getLoc() const
Definition MCInst.h:208
void setLoc(SMLoc loc)
Definition MCInst.h:207
unsigned getOpcode() const
Definition MCInst.h:202
iterator insert(iterator I, const MCOperand &Op)
Definition MCInst.h:232
void addOperand(const MCOperand Op)
Definition MCInst.h:215
iterator begin()
Definition MCInst.h:227
size_t size() const
Definition MCInst.h:226
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Describe properties that are true of each instruction in the target description file.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:64
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
void setImm(int64_t Val)
Definition MCInst.h:89
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
int64_t getImm() const
Definition MCInst.h:84
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
bool isImm() const
Definition MCInst.h:66
void setReg(MCRegister Reg)
Set the register number.
Definition MCInst.h:79
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
bool isExpr() const
Definition MCInst.h:69
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
constexpr bool isValid() const
Definition MCRegister.h:76
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50
void setRedefinable(bool Value)
Mark this symbol as redefinable.
Definition MCSymbol.h:210
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
MCTargetAsmParser - Generic interface to target specific assembly parsers.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
constexpr unsigned id() const
Definition Register.h:95
Represents a location in source code.
Definition SMLoc.h:23
static SMLoc getFromPointer(const char *Ptr)
Definition SMLoc.h:36
constexpr const char * getPointer() const
Definition SMLoc.h:34
constexpr bool isValid() const
Definition SMLoc.h:29
Represents a range in source code.
Definition SMLoc.h:48
SMLoc Start
Definition SMLoc.h:50
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:862
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
Definition StringRef.h:665
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:581
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:269
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:151
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:619
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:154
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:148
bool consume_front(StringRef Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition StringRef.h:645
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition StringRef.h:281
bool contains(StringRef key) const
Check if the set contains the given key.
Definition StringSet.h:60
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition StringSet.h:39
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:194
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool hasMAIInsts(const MCSubtargetInfo &STI)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCSubtargetInfo &ST)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:231
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:202
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:209
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:222
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:219
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:224
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:210
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:206
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:201
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:208
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:207
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:218
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:216
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:211
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:205
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:225
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:236
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:237
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:212
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:204
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:221
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:217
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:223
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:213
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:238
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:220
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:203
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition SIDefines.h:228
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isGFX1250(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1422
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ OPERAND_IMMEDIATE
Definition MCInstrDesc.h:61
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Context & getContext() const
Definition BasicBlock.h:99
bool isNull(StringRef S)
Definition YAMLTraits.h:578
This is an optimization pass for GlobalISel generic memory operations.
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
Definition Error.h:1113
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
StringMapEntry< Value * > ValueName
Definition Value.h:56
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1727
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:307
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
Definition Alignment.h:217
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
static bool isMem(const MachineInstr &MI, unsigned Op)
LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
void PrintError(const Twine &Msg)
Definition Error.cpp:104
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:252
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
Definition InstrProf.h:267
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:295
Op::Description Desc
Target & getTheR600Target()
The target for R600 GPUs.
SmallVectorImpl< std::unique_ptr< MCParsedAsmOperand > > OperandVector
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:159
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:164
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:399
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Sub
Subtraction of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:155
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1760
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:257
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
#define N
RegisterKind Kind
StringLiteral Name
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, bool InitMCExpr=true)
Instruction set architecture version.
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
static LLVM_ABI const fltSemantics & IEEEsingle() LLVM_READNONE
Definition APFloat.cpp:266
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304
static LLVM_ABI const fltSemantics & IEEEdouble() LLVM_READNONE
Definition APFloat.cpp:267
static LLVM_ABI const fltSemantics & IEEEhalf() LLVM_READNONE
Definition APFloat.cpp:264
static LLVM_ABI const fltSemantics & BFloat() LLVM_READNONE
Definition APFloat.cpp:265
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:320
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...