LLVM 22.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
16#include "SIDefines.h"
17#include "SIInstrInfo.h"
22#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/StringSet.h"
25#include "llvm/ADT/Twine.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInst.h"
32#include "llvm/MC/MCInstrDesc.h"
38#include "llvm/MC/MCSymbol.h"
46#include <optional>
47
48using namespace llvm;
49using namespace llvm::AMDGPU;
50using namespace llvm::amdhsa;
51
52namespace {
53
54class AMDGPUAsmParser;
55
56enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
57
58//===----------------------------------------------------------------------===//
59// Operand
60//===----------------------------------------------------------------------===//
61
62class AMDGPUOperand : public MCParsedAsmOperand {
63 enum KindTy {
64 Token,
65 Immediate,
68 } Kind;
69
70 SMLoc StartLoc, EndLoc;
71 const AMDGPUAsmParser *AsmParser;
72
73public:
74 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
75 : Kind(Kind_), AsmParser(AsmParser_) {}
76
77 using Ptr = std::unique_ptr<AMDGPUOperand>;
78
79 struct Modifiers {
80 bool Abs = false;
81 bool Neg = false;
82 bool Sext = false;
83 bool Lit = false;
84 bool Lit64 = false;
85
86 bool hasFPModifiers() const { return Abs || Neg; }
87 bool hasIntModifiers() const { return Sext; }
88 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
89
90 int64_t getFPModifiersOperand() const {
91 int64_t Operand = 0;
92 Operand |= Abs ? SISrcMods::ABS : 0u;
93 Operand |= Neg ? SISrcMods::NEG : 0u;
94 return Operand;
95 }
96
97 int64_t getIntModifiersOperand() const {
98 int64_t Operand = 0;
99 Operand |= Sext ? SISrcMods::SEXT : 0u;
100 return Operand;
101 }
102
103 int64_t getModifiersOperand() const {
104 assert(!(hasFPModifiers() && hasIntModifiers())
105 && "fp and int modifiers should not be used simultaneously");
106 if (hasFPModifiers())
107 return getFPModifiersOperand();
108 if (hasIntModifiers())
109 return getIntModifiersOperand();
110 return 0;
111 }
112
113 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
114 };
115
116 enum ImmTy {
117 ImmTyNone,
118 ImmTyGDS,
119 ImmTyLDS,
120 ImmTyOffen,
121 ImmTyIdxen,
122 ImmTyAddr64,
123 ImmTyOffset,
124 ImmTyInstOffset,
125 ImmTyOffset0,
126 ImmTyOffset1,
127 ImmTySMEMOffsetMod,
128 ImmTyCPol,
129 ImmTyTFE,
130 ImmTyD16,
131 ImmTyClamp,
132 ImmTyOModSI,
133 ImmTySDWADstSel,
134 ImmTySDWASrc0Sel,
135 ImmTySDWASrc1Sel,
136 ImmTySDWADstUnused,
137 ImmTyDMask,
138 ImmTyDim,
139 ImmTyUNorm,
140 ImmTyDA,
141 ImmTyR128A16,
142 ImmTyA16,
143 ImmTyLWE,
144 ImmTyExpTgt,
145 ImmTyExpCompr,
146 ImmTyExpVM,
147 ImmTyFORMAT,
148 ImmTyHwreg,
149 ImmTyOff,
150 ImmTySendMsg,
151 ImmTyInterpSlot,
152 ImmTyInterpAttr,
153 ImmTyInterpAttrChan,
154 ImmTyOpSel,
155 ImmTyOpSelHi,
156 ImmTyNegLo,
157 ImmTyNegHi,
158 ImmTyIndexKey8bit,
159 ImmTyIndexKey16bit,
160 ImmTyIndexKey32bit,
161 ImmTyDPP8,
162 ImmTyDppCtrl,
163 ImmTyDppRowMask,
164 ImmTyDppBankMask,
165 ImmTyDppBoundCtrl,
166 ImmTyDppFI,
167 ImmTySwizzle,
168 ImmTyGprIdxMode,
169 ImmTyHigh,
170 ImmTyBLGP,
171 ImmTyCBSZ,
172 ImmTyABID,
173 ImmTyEndpgm,
174 ImmTyWaitVDST,
175 ImmTyWaitEXP,
176 ImmTyWaitVAVDst,
177 ImmTyWaitVMVSrc,
178 ImmTyBitOp3,
179 ImmTyMatrixAFMT,
180 ImmTyMatrixBFMT,
181 ImmTyMatrixAScale,
182 ImmTyMatrixBScale,
183 ImmTyMatrixAScaleFmt,
184 ImmTyMatrixBScaleFmt,
185 ImmTyMatrixAReuse,
186 ImmTyMatrixBReuse,
187 ImmTyScaleSel,
188 ImmTyByteSel,
189 };
190
191 // Immediate operand kind.
192 // It helps to identify the location of an offending operand after an error.
193 // Note that regular literals and mandatory literals (KImm) must be handled
194 // differently. When looking for an offending operand, we should usually
195 // ignore mandatory literals because they are part of the instruction and
196 // cannot be changed. Report location of mandatory operands only for VOPD,
197 // when both OpX and OpY have a KImm and there are no other literals.
198 enum ImmKindTy {
199 ImmKindTyNone,
200 ImmKindTyLiteral,
201 ImmKindTyMandatoryLiteral,
202 ImmKindTyConst,
203 };
204
205private:
206 struct TokOp {
207 const char *Data;
208 unsigned Length;
209 };
210
211 struct ImmOp {
212 int64_t Val;
213 ImmTy Type;
214 bool IsFPImm;
215 mutable ImmKindTy Kind;
216 Modifiers Mods;
217 };
218
219 struct RegOp {
220 MCRegister RegNo;
221 Modifiers Mods;
222 };
223
224 union {
225 TokOp Tok;
226 ImmOp Imm;
227 RegOp Reg;
228 const MCExpr *Expr;
229 };
230
231public:
232 bool isToken() const override { return Kind == Token; }
233
234 bool isSymbolRefExpr() const {
235 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
236 }
237
238 bool isImm() const override {
239 return Kind == Immediate;
240 }
241
242 void setImmKindNone() const {
243 assert(isImm());
244 Imm.Kind = ImmKindTyNone;
245 }
246
247 void setImmKindLiteral() const {
248 assert(isImm());
249 Imm.Kind = ImmKindTyLiteral;
250 }
251
252 void setImmKindMandatoryLiteral() const {
253 assert(isImm());
254 Imm.Kind = ImmKindTyMandatoryLiteral;
255 }
256
257 void setImmKindConst() const {
258 assert(isImm());
259 Imm.Kind = ImmKindTyConst;
260 }
261
262 bool IsImmKindLiteral() const {
263 return isImm() && Imm.Kind == ImmKindTyLiteral;
264 }
265
266 bool IsImmKindMandatoryLiteral() const {
267 return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
268 }
269
270 bool isImmKindConst() const {
271 return isImm() && Imm.Kind == ImmKindTyConst;
272 }
273
274 bool isInlinableImm(MVT type) const;
275 bool isLiteralImm(MVT type) const;
276
277 bool isRegKind() const {
278 return Kind == Register;
279 }
280
281 bool isReg() const override {
282 return isRegKind() && !hasModifiers();
283 }
284
285 bool isRegOrInline(unsigned RCID, MVT type) const {
286 return isRegClass(RCID) || isInlinableImm(type);
287 }
288
289 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
290 return isRegOrInline(RCID, type) || isLiteralImm(type);
291 }
292
293 bool isRegOrImmWithInt16InputMods() const {
294 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
295 }
296
297 template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {
299 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
300 }
301
302 bool isRegOrImmWithInt32InputMods() const {
303 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
304 }
305
306 bool isRegOrInlineImmWithInt16InputMods() const {
307 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
308 }
309
310 template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {
311 return isRegOrInline(
312 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
313 }
314
315 bool isRegOrInlineImmWithInt32InputMods() const {
316 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
317 }
318
319 bool isRegOrImmWithInt64InputMods() const {
320 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
321 }
322
323 bool isRegOrImmWithFP16InputMods() const {
324 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
325 }
326
327 template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {
329 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
330 }
331
332 bool isRegOrImmWithFP32InputMods() const {
333 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
334 }
335
336 bool isRegOrImmWithFP64InputMods() const {
337 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
338 }
339
340 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
341 return isRegOrInline(
342 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
343 }
344
345 bool isRegOrInlineImmWithFP32InputMods() const {
346 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
347 }
348
349 bool isRegOrInlineImmWithFP64InputMods() const {
350 return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
351 }
352
353 bool isVRegWithInputMods(unsigned RCID) const { return isRegClass(RCID); }
354
355 bool isVRegWithFP32InputMods() const {
356 return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
357 }
358
359 bool isVRegWithFP64InputMods() const {
360 return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
361 }
362
363 bool isPackedFP16InputMods() const {
364 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
365 }
366
367 bool isPackedVGPRFP32InputMods() const {
368 return isRegOrImmWithInputMods(AMDGPU::VReg_64RegClassID, MVT::v2f32);
369 }
370
371 bool isVReg() const {
372 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
373 isRegClass(AMDGPU::VReg_64RegClassID) ||
374 isRegClass(AMDGPU::VReg_96RegClassID) ||
375 isRegClass(AMDGPU::VReg_128RegClassID) ||
376 isRegClass(AMDGPU::VReg_160RegClassID) ||
377 isRegClass(AMDGPU::VReg_192RegClassID) ||
378 isRegClass(AMDGPU::VReg_256RegClassID) ||
379 isRegClass(AMDGPU::VReg_512RegClassID) ||
380 isRegClass(AMDGPU::VReg_1024RegClassID);
381 }
382
383 bool isVReg32() const {
384 return isRegClass(AMDGPU::VGPR_32RegClassID);
385 }
386
387 bool isVReg32OrOff() const {
388 return isOff() || isVReg32();
389 }
390
391 bool isNull() const {
392 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
393 }
394
395 bool isVRegWithInputMods() const;
396 template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
397 template <bool IsFake16> bool isT16VRegWithInputMods() const;
398
399 bool isSDWAOperand(MVT type) const;
400 bool isSDWAFP16Operand() const;
401 bool isSDWAFP32Operand() const;
402 bool isSDWAInt16Operand() const;
403 bool isSDWAInt32Operand() const;
404
405 bool isImmTy(ImmTy ImmT) const {
406 return isImm() && Imm.Type == ImmT;
407 }
408
409 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
410
411 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
412
413 bool isImmModifier() const {
414 return isImm() && Imm.Type != ImmTyNone;
415 }
416
417 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
418 bool isDim() const { return isImmTy(ImmTyDim); }
419 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
420 bool isOff() const { return isImmTy(ImmTyOff); }
421 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
422 bool isOffen() const { return isImmTy(ImmTyOffen); }
423 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
424 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
425 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
426 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
427 bool isGDS() const { return isImmTy(ImmTyGDS); }
428 bool isLDS() const { return isImmTy(ImmTyLDS); }
429 bool isCPol() const { return isImmTy(ImmTyCPol); }
430 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
431 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
432 bool isIndexKey32bit() const { return isImmTy(ImmTyIndexKey32bit); }
433 bool isMatrixAFMT() const { return isImmTy(ImmTyMatrixAFMT); }
434 bool isMatrixBFMT() const { return isImmTy(ImmTyMatrixBFMT); }
435 bool isMatrixAScale() const { return isImmTy(ImmTyMatrixAScale); }
436 bool isMatrixBScale() const { return isImmTy(ImmTyMatrixBScale); }
437 bool isMatrixAScaleFmt() const { return isImmTy(ImmTyMatrixAScaleFmt); }
438 bool isMatrixBScaleFmt() const { return isImmTy(ImmTyMatrixBScaleFmt); }
439 bool isMatrixAReuse() const { return isImmTy(ImmTyMatrixAReuse); }
440 bool isMatrixBReuse() const { return isImmTy(ImmTyMatrixBReuse); }
441 bool isTFE() const { return isImmTy(ImmTyTFE); }
442 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
443 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
444 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
445 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
446 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
447 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
448 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
449 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
450 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
451 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
452 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
453 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
454 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
455 bool isBitOp3() const { return isImmTy(ImmTyBitOp3) && isUInt<8>(getImm()); }
456
457 bool isRegOrImm() const {
458 return isReg() || isImm();
459 }
460
461 bool isRegClass(unsigned RCID) const;
462
463 bool isInlineValue() const;
464
465 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
466 return isRegOrInline(RCID, type) && !hasModifiers();
467 }
468
469 bool isSCSrcB16() const {
470 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
471 }
472
473 bool isSCSrcV2B16() const {
474 return isSCSrcB16();
475 }
476
477 bool isSCSrc_b32() const {
478 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
479 }
480
481 bool isSCSrc_b64() const {
482 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
483 }
484
485 bool isBoolReg() const;
486
487 bool isSCSrcF16() const {
488 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
489 }
490
491 bool isSCSrcV2F16() const {
492 return isSCSrcF16();
493 }
494
495 bool isSCSrcF32() const {
496 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
497 }
498
499 bool isSCSrcF64() const {
500 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
501 }
502
503 bool isSSrc_b32() const {
504 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
505 }
506
507 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
508
509 bool isSSrcV2B16() const {
510 llvm_unreachable("cannot happen");
511 return isSSrc_b16();
512 }
513
514 bool isSSrc_b64() const {
515 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
516 // See isVSrc64().
517 return isSCSrc_b64() || isLiteralImm(MVT::i64) ||
518 (((const MCTargetAsmParser *)AsmParser)
519 ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
520 isExpr());
521 }
522
523 bool isSSrc_f32() const {
524 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
525 }
526
527 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
528
529 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
530
531 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
532
533 bool isSSrcV2F16() const {
534 llvm_unreachable("cannot happen");
535 return isSSrc_f16();
536 }
537
538 bool isSSrcV2FP32() const {
539 llvm_unreachable("cannot happen");
540 return isSSrc_f32();
541 }
542
543 bool isSCSrcV2FP32() const {
544 llvm_unreachable("cannot happen");
545 return isSCSrcF32();
546 }
547
548 bool isSSrcV2INT32() const {
549 llvm_unreachable("cannot happen");
550 return isSSrc_b32();
551 }
552
553 bool isSCSrcV2INT32() const {
554 llvm_unreachable("cannot happen");
555 return isSCSrc_b32();
556 }
557
558 bool isSSrcOrLds_b32() const {
559 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
560 isLiteralImm(MVT::i32) || isExpr();
561 }
562
563 bool isVCSrc_b32() const {
564 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
565 }
566
567 bool isVCSrc_b64() const {
568 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
569 }
570
571 bool isVCSrcT_b16() const {
572 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
573 }
574
575 bool isVCSrcTB16_Lo128() const {
576 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
577 }
578
579 bool isVCSrcFake16B16_Lo128() const {
580 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
581 }
582
583 bool isVCSrc_b16() const {
584 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
585 }
586
587 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
588
589 bool isVCSrc_f32() const {
590 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
591 }
592
593 bool isVCSrc_f64() const {
594 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
595 }
596
597 bool isVCSrcTBF16() const {
598 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
599 }
600
601 bool isVCSrcT_f16() const {
602 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
603 }
604
605 bool isVCSrcT_bf16() const {
606 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
607 }
608
609 bool isVCSrcTBF16_Lo128() const {
610 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
611 }
612
613 bool isVCSrcTF16_Lo128() const {
614 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
615 }
616
617 bool isVCSrcFake16BF16_Lo128() const {
618 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
619 }
620
621 bool isVCSrcFake16F16_Lo128() const {
622 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
623 }
624
625 bool isVCSrc_bf16() const {
626 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
627 }
628
629 bool isVCSrc_f16() const {
630 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
631 }
632
633 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
634
635 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
636
637 bool isVSrc_b32() const {
638 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
639 }
640
641 bool isVSrc_b64() const { return isVCSrc_f64() || isLiteralImm(MVT::i64); }
642
643 bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
644
645 bool isVSrcT_b16_Lo128() const {
646 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
647 }
648
649 bool isVSrcFake16_b16_Lo128() const {
650 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
651 }
652
653 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
654
655 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
656
657 bool isVCSrcV2FP32() const { return isVCSrc_f64(); }
658
659 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
660
661 bool isVCSrc_v2b32() const { return isVCSrc_b64(); }
662
663 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
664
665 bool isVSrc_f32() const {
666 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
667 }
668
669 bool isVSrc_f64() const { return isVCSrc_f64() || isLiteralImm(MVT::f64); }
670
671 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
672
673 bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
674
675 bool isVSrcT_bf16_Lo128() const {
676 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
677 }
678
679 bool isVSrcT_f16_Lo128() const {
680 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
681 }
682
683 bool isVSrcFake16_bf16_Lo128() const {
684 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
685 }
686
687 bool isVSrcFake16_f16_Lo128() const {
688 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
689 }
690
691 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
692
693 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
694
695 bool isVSrc_v2bf16() const {
696 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
697 }
698
699 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
700
701 bool isVSrc_NoInline_v2f16() const { return isVSrc_v2f16(); }
702
703 bool isVISrcB32() const {
704 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
705 }
706
707 bool isVISrcB16() const {
708 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
709 }
710
711 bool isVISrcV2B16() const {
712 return isVISrcB16();
713 }
714
715 bool isVISrcF32() const {
716 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
717 }
718
719 bool isVISrcF16() const {
720 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
721 }
722
723 bool isVISrcV2F16() const {
724 return isVISrcF16() || isVISrcB32();
725 }
726
727 bool isVISrc_64_bf16() const {
728 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
729 }
730
731 bool isVISrc_64_f16() const {
732 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
733 }
734
735 bool isVISrc_64_b32() const {
736 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
737 }
738
739 bool isVISrc_64B64() const {
740 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
741 }
742
743 bool isVISrc_64_f64() const {
744 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
745 }
746
747 bool isVISrc_64V2FP32() const {
748 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
749 }
750
751 bool isVISrc_64V2INT32() const {
752 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
753 }
754
755 bool isVISrc_256_b32() const {
756 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
757 }
758
759 bool isVISrc_256_f32() const {
760 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
761 }
762
763 bool isVISrc_256B64() const {
764 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
765 }
766
767 bool isVISrc_256_f64() const {
768 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
769 }
770
771 bool isVISrc_512_f64() const {
772 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64);
773 }
774
775 bool isVISrc_128B16() const {
776 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
777 }
778
779 bool isVISrc_128V2B16() const {
780 return isVISrc_128B16();
781 }
782
783 bool isVISrc_128_b32() const {
784 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
785 }
786
787 bool isVISrc_128_f32() const {
788 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
789 }
790
791 bool isVISrc_256V2FP32() const {
792 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
793 }
794
795 bool isVISrc_256V2INT32() const {
796 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
797 }
798
799 bool isVISrc_512_b32() const {
800 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
801 }
802
803 bool isVISrc_512B16() const {
804 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
805 }
806
807 bool isVISrc_512V2B16() const {
808 return isVISrc_512B16();
809 }
810
811 bool isVISrc_512_f32() const {
812 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
813 }
814
815 bool isVISrc_512F16() const {
816 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
817 }
818
819 bool isVISrc_512V2F16() const {
820 return isVISrc_512F16() || isVISrc_512_b32();
821 }
822
823 bool isVISrc_1024_b32() const {
824 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
825 }
826
827 bool isVISrc_1024B16() const {
828 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
829 }
830
831 bool isVISrc_1024V2B16() const {
832 return isVISrc_1024B16();
833 }
834
835 bool isVISrc_1024_f32() const {
836 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
837 }
838
839 bool isVISrc_1024F16() const {
840 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
841 }
842
843 bool isVISrc_1024V2F16() const {
844 return isVISrc_1024F16() || isVISrc_1024_b32();
845 }
846
847 bool isAISrcB32() const {
848 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
849 }
850
851 bool isAISrcB16() const {
852 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
853 }
854
855 bool isAISrcV2B16() const {
856 return isAISrcB16();
857 }
858
859 bool isAISrcF32() const {
860 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
861 }
862
863 bool isAISrcF16() const {
864 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
865 }
866
867 bool isAISrcV2F16() const {
868 return isAISrcF16() || isAISrcB32();
869 }
870
871 bool isAISrc_64B64() const {
872 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
873 }
874
875 bool isAISrc_64_f64() const {
876 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
877 }
878
879 bool isAISrc_128_b32() const {
880 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
881 }
882
883 bool isAISrc_128B16() const {
884 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
885 }
886
887 bool isAISrc_128V2B16() const {
888 return isAISrc_128B16();
889 }
890
891 bool isAISrc_128_f32() const {
892 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
893 }
894
895 bool isAISrc_128F16() const {
896 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
897 }
898
899 bool isAISrc_128V2F16() const {
900 return isAISrc_128F16() || isAISrc_128_b32();
901 }
902
903 bool isVISrc_128_bf16() const {
904 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
905 }
906
907 bool isVISrc_128_f16() const {
908 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
909 }
910
911 bool isVISrc_128V2F16() const {
912 return isVISrc_128_f16() || isVISrc_128_b32();
913 }
914
915 bool isAISrc_256B64() const {
916 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
917 }
918
919 bool isAISrc_256_f64() const {
920 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
921 }
922
923 bool isAISrc_512_b32() const {
924 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
925 }
926
927 bool isAISrc_512B16() const {
928 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
929 }
930
931 bool isAISrc_512V2B16() const {
932 return isAISrc_512B16();
933 }
934
935 bool isAISrc_512_f32() const {
936 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
937 }
938
939 bool isAISrc_512F16() const {
940 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
941 }
942
943 bool isAISrc_512V2F16() const {
944 return isAISrc_512F16() || isAISrc_512_b32();
945 }
946
947 bool isAISrc_1024_b32() const {
948 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
949 }
950
951 bool isAISrc_1024B16() const {
952 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
953 }
954
955 bool isAISrc_1024V2B16() const {
956 return isAISrc_1024B16();
957 }
958
959 bool isAISrc_1024_f32() const {
960 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
961 }
962
963 bool isAISrc_1024F16() const {
964 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
965 }
966
967 bool isAISrc_1024V2F16() const {
968 return isAISrc_1024F16() || isAISrc_1024_b32();
969 }
970
971 bool isKImmFP32() const {
972 return isLiteralImm(MVT::f32);
973 }
974
975 bool isKImmFP16() const {
976 return isLiteralImm(MVT::f16);
977 }
978
979 bool isKImmFP64() const { return isLiteralImm(MVT::f64); }
980
981 bool isMem() const override {
982 return false;
983 }
984
985 bool isExpr() const {
986 return Kind == Expression;
987 }
988
989 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
990
991 bool isSWaitCnt() const;
992 bool isDepCtr() const;
993 bool isSDelayALU() const;
994 bool isHwreg() const;
995 bool isSendMsg() const;
996 bool isSplitBarrier() const;
997 bool isSwizzle() const;
998 bool isSMRDOffset8() const;
999 bool isSMEMOffset() const;
1000 bool isSMRDLiteralOffset() const;
1001 bool isDPP8() const;
1002 bool isDPPCtrl() const;
1003 bool isBLGP() const;
1004 bool isGPRIdxMode() const;
1005 bool isS16Imm() const;
1006 bool isU16Imm() const;
1007 bool isEndpgm() const;
1008
1009 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
1010 return [this, P]() { return P(*this); };
1011 }
1012
1013 StringRef getToken() const {
1014 assert(isToken());
1015 return StringRef(Tok.Data, Tok.Length);
1016 }
1017
1018 int64_t getImm() const {
1019 assert(isImm());
1020 return Imm.Val;
1021 }
1022
1023 void setImm(int64_t Val) {
1024 assert(isImm());
1025 Imm.Val = Val;
1026 }
1027
1028 ImmTy getImmTy() const {
1029 assert(isImm());
1030 return Imm.Type;
1031 }
1032
1033 MCRegister getReg() const override {
1034 assert(isRegKind());
1035 return Reg.RegNo;
1036 }
1037
1038 SMLoc getStartLoc() const override {
1039 return StartLoc;
1040 }
1041
1042 SMLoc getEndLoc() const override {
1043 return EndLoc;
1044 }
1045
1046 SMRange getLocRange() const {
1047 return SMRange(StartLoc, EndLoc);
1048 }
1049
1050 Modifiers getModifiers() const {
1051 assert(isRegKind() || isImmTy(ImmTyNone));
1052 return isRegKind() ? Reg.Mods : Imm.Mods;
1053 }
1054
1055 void setModifiers(Modifiers Mods) {
1056 assert(isRegKind() || isImmTy(ImmTyNone));
1057 if (isRegKind())
1058 Reg.Mods = Mods;
1059 else
1060 Imm.Mods = Mods;
1061 }
1062
1063 bool hasModifiers() const {
1064 return getModifiers().hasModifiers();
1065 }
1066
1067 bool hasFPModifiers() const {
1068 return getModifiers().hasFPModifiers();
1069 }
1070
1071 bool hasIntModifiers() const {
1072 return getModifiers().hasIntModifiers();
1073 }
1074
1075 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1076
1077 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1078
1079 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1080
1081 void addRegOperands(MCInst &Inst, unsigned N) const;
1082
1083 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1084 if (isRegKind())
1085 addRegOperands(Inst, N);
1086 else
1087 addImmOperands(Inst, N);
1088 }
1089
1090 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1091 Modifiers Mods = getModifiers();
1092 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1093 if (isRegKind()) {
1094 addRegOperands(Inst, N);
1095 } else {
1096 addImmOperands(Inst, N, false);
1097 }
1098 }
1099
1100 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1101 assert(!hasIntModifiers());
1102 addRegOrImmWithInputModsOperands(Inst, N);
1103 }
1104
1105 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1106 assert(!hasFPModifiers());
1107 addRegOrImmWithInputModsOperands(Inst, N);
1108 }
1109
1110 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1111 Modifiers Mods = getModifiers();
1112 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1113 assert(isRegKind());
1114 addRegOperands(Inst, N);
1115 }
1116
1117 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1118 assert(!hasIntModifiers());
1119 addRegWithInputModsOperands(Inst, N);
1120 }
1121
1122 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1123 assert(!hasFPModifiers());
1124 addRegWithInputModsOperands(Inst, N);
1125 }
1126
1127 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1128 // clang-format off
1129 switch (Type) {
1130 case ImmTyNone: OS << "None"; break;
1131 case ImmTyGDS: OS << "GDS"; break;
1132 case ImmTyLDS: OS << "LDS"; break;
1133 case ImmTyOffen: OS << "Offen"; break;
1134 case ImmTyIdxen: OS << "Idxen"; break;
1135 case ImmTyAddr64: OS << "Addr64"; break;
1136 case ImmTyOffset: OS << "Offset"; break;
1137 case ImmTyInstOffset: OS << "InstOffset"; break;
1138 case ImmTyOffset0: OS << "Offset0"; break;
1139 case ImmTyOffset1: OS << "Offset1"; break;
1140 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1141 case ImmTyCPol: OS << "CPol"; break;
1142 case ImmTyIndexKey8bit: OS << "index_key"; break;
1143 case ImmTyIndexKey16bit: OS << "index_key"; break;
1144 case ImmTyIndexKey32bit: OS << "index_key"; break;
1145 case ImmTyTFE: OS << "TFE"; break;
1146 case ImmTyD16: OS << "D16"; break;
1147 case ImmTyFORMAT: OS << "FORMAT"; break;
1148 case ImmTyClamp: OS << "Clamp"; break;
1149 case ImmTyOModSI: OS << "OModSI"; break;
1150 case ImmTyDPP8: OS << "DPP8"; break;
1151 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1152 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1153 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1154 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1155 case ImmTyDppFI: OS << "DppFI"; break;
1156 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1157 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1158 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1159 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1160 case ImmTyDMask: OS << "DMask"; break;
1161 case ImmTyDim: OS << "Dim"; break;
1162 case ImmTyUNorm: OS << "UNorm"; break;
1163 case ImmTyDA: OS << "DA"; break;
1164 case ImmTyR128A16: OS << "R128A16"; break;
1165 case ImmTyA16: OS << "A16"; break;
1166 case ImmTyLWE: OS << "LWE"; break;
1167 case ImmTyOff: OS << "Off"; break;
1168 case ImmTyExpTgt: OS << "ExpTgt"; break;
1169 case ImmTyExpCompr: OS << "ExpCompr"; break;
1170 case ImmTyExpVM: OS << "ExpVM"; break;
1171 case ImmTyHwreg: OS << "Hwreg"; break;
1172 case ImmTySendMsg: OS << "SendMsg"; break;
1173 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1174 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1175 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1176 case ImmTyOpSel: OS << "OpSel"; break;
1177 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1178 case ImmTyNegLo: OS << "NegLo"; break;
1179 case ImmTyNegHi: OS << "NegHi"; break;
1180 case ImmTySwizzle: OS << "Swizzle"; break;
1181 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1182 case ImmTyHigh: OS << "High"; break;
1183 case ImmTyBLGP: OS << "BLGP"; break;
1184 case ImmTyCBSZ: OS << "CBSZ"; break;
1185 case ImmTyABID: OS << "ABID"; break;
1186 case ImmTyEndpgm: OS << "Endpgm"; break;
1187 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1188 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1189 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1190 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1191 case ImmTyBitOp3: OS << "BitOp3"; break;
1192 case ImmTyMatrixAFMT: OS << "ImmTyMatrixAFMT"; break;
1193 case ImmTyMatrixBFMT: OS << "ImmTyMatrixBFMT"; break;
1194 case ImmTyMatrixAScale: OS << "ImmTyMatrixAScale"; break;
1195 case ImmTyMatrixBScale: OS << "ImmTyMatrixBScale"; break;
1196 case ImmTyMatrixAScaleFmt: OS << "ImmTyMatrixAScaleFmt"; break;
1197 case ImmTyMatrixBScaleFmt: OS << "ImmTyMatrixBScaleFmt"; break;
1198 case ImmTyMatrixAReuse: OS << "ImmTyMatrixAReuse"; break;
1199 case ImmTyMatrixBReuse: OS << "ImmTyMatrixBReuse"; break;
1200 case ImmTyScaleSel: OS << "ScaleSel" ; break;
1201 case ImmTyByteSel: OS << "ByteSel" ; break;
1202 }
1203 // clang-format on
1204 }
1205
1206 void print(raw_ostream &OS, const MCAsmInfo &MAI) const override {
1207 switch (Kind) {
1208 case Register:
1209 OS << "<register " << AMDGPUInstPrinter::getRegisterName(getReg())
1210 << " mods: " << Reg.Mods << '>';
1211 break;
1212 case Immediate:
1213 OS << '<' << getImm();
1214 if (getImmTy() != ImmTyNone) {
1215 OS << " type: "; printImmTy(OS, getImmTy());
1216 }
1217 OS << " mods: " << Imm.Mods << '>';
1218 break;
1219 case Token:
1220 OS << '\'' << getToken() << '\'';
1221 break;
1222 case Expression:
1223 OS << "<expr ";
1224 MAI.printExpr(OS, *Expr);
1225 OS << '>';
1226 break;
1227 }
1228 }
1229
1230 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1231 int64_t Val, SMLoc Loc,
1232 ImmTy Type = ImmTyNone,
1233 bool IsFPImm = false) {
1234 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1235 Op->Imm.Val = Val;
1236 Op->Imm.IsFPImm = IsFPImm;
1237 Op->Imm.Kind = ImmKindTyNone;
1238 Op->Imm.Type = Type;
1239 Op->Imm.Mods = Modifiers();
1240 Op->StartLoc = Loc;
1241 Op->EndLoc = Loc;
1242 return Op;
1243 }
1244
1245 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1246 StringRef Str, SMLoc Loc,
1247 bool HasExplicitEncodingSize = true) {
1248 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1249 Res->Tok.Data = Str.data();
1250 Res->Tok.Length = Str.size();
1251 Res->StartLoc = Loc;
1252 Res->EndLoc = Loc;
1253 return Res;
1254 }
1255
1256 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1257 MCRegister Reg, SMLoc S, SMLoc E) {
1258 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1259 Op->Reg.RegNo = Reg;
1260 Op->Reg.Mods = Modifiers();
1261 Op->StartLoc = S;
1262 Op->EndLoc = E;
1263 return Op;
1264 }
1265
1266 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1267 const class MCExpr *Expr, SMLoc S) {
1268 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1269 Op->Expr = Expr;
1270 Op->StartLoc = S;
1271 Op->EndLoc = S;
1272 return Op;
1273 }
1274};
1275
1276raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1277 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1278 return OS;
1279}
1280
1281//===----------------------------------------------------------------------===//
1282// AsmParser
1283//===----------------------------------------------------------------------===//
1284
1285// Holds info related to the current kernel, e.g. count of SGPRs used.
1286// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1287// .amdgpu_hsa_kernel or at EOF.
1288class KernelScopeInfo {
1289 int SgprIndexUnusedMin = -1;
1290 int VgprIndexUnusedMin = -1;
1291 int AgprIndexUnusedMin = -1;
1292 MCContext *Ctx = nullptr;
1293 MCSubtargetInfo const *MSTI = nullptr;
1294
1295 void usesSgprAt(int i) {
1296 if (i >= SgprIndexUnusedMin) {
1297 SgprIndexUnusedMin = ++i;
1298 if (Ctx) {
1299 MCSymbol* const Sym =
1300 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1301 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1302 }
1303 }
1304 }
1305
1306 void usesVgprAt(int i) {
1307 if (i >= VgprIndexUnusedMin) {
1308 VgprIndexUnusedMin = ++i;
1309 if (Ctx) {
1310 MCSymbol* const Sym =
1311 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1312 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1313 VgprIndexUnusedMin);
1314 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1315 }
1316 }
1317 }
1318
1319 void usesAgprAt(int i) {
1320 // Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction
1321 if (!hasMAIInsts(*MSTI))
1322 return;
1323
1324 if (i >= AgprIndexUnusedMin) {
1325 AgprIndexUnusedMin = ++i;
1326 if (Ctx) {
1327 MCSymbol* const Sym =
1328 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1329 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1330
1331 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1332 MCSymbol* const vSym =
1333 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1334 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1335 VgprIndexUnusedMin);
1336 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1337 }
1338 }
1339 }
1340
1341public:
1342 KernelScopeInfo() = default;
1343
1344 void initialize(MCContext &Context) {
1345 Ctx = &Context;
1346 MSTI = Ctx->getSubtargetInfo();
1347
1348 usesSgprAt(SgprIndexUnusedMin = -1);
1349 usesVgprAt(VgprIndexUnusedMin = -1);
1350 if (hasMAIInsts(*MSTI)) {
1351 usesAgprAt(AgprIndexUnusedMin = -1);
1352 }
1353 }
1354
1355 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1356 unsigned RegWidth) {
1357 switch (RegKind) {
1358 case IS_SGPR:
1359 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1360 break;
1361 case IS_AGPR:
1362 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1363 break;
1364 case IS_VGPR:
1365 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1366 break;
1367 default:
1368 break;
1369 }
1370 }
1371};
1372
1373class AMDGPUAsmParser : public MCTargetAsmParser {
1374 MCAsmParser &Parser;
1375
1376 unsigned ForcedEncodingSize = 0;
1377 bool ForcedDPP = false;
1378 bool ForcedSDWA = false;
1379 KernelScopeInfo KernelScope;
1380
1381 /// @name Auto-generated Match Functions
1382 /// {
1383
1384#define GET_ASSEMBLER_HEADER
1385#include "AMDGPUGenAsmMatcher.inc"
1386
1387 /// }
1388
1389private:
1390 void createConstantSymbol(StringRef Id, int64_t Val);
1391
1392 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1393 bool OutOfRangeError(SMRange Range);
1394 /// Calculate VGPR/SGPR blocks required for given target, reserved
1395 /// registers, and user-specified NextFreeXGPR values.
1396 ///
1397 /// \param Features [in] Target features, used for bug corrections.
1398 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1399 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1400 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1401 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1402 /// descriptor field, if valid.
1403 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1404 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1405 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1406 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1407 /// \param VGPRBlocks [out] Result VGPR block count.
1408 /// \param SGPRBlocks [out] Result SGPR block count.
1409 bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1410 const MCExpr *FlatScrUsed, bool XNACKUsed,
1411 std::optional<bool> EnableWavefrontSize32,
1412 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1413 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1414 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
1415 bool ParseDirectiveAMDGCNTarget();
1416 bool ParseDirectiveAMDHSACodeObjectVersion();
1417 bool ParseDirectiveAMDHSAKernel();
1418 bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1419 bool ParseDirectiveAMDKernelCodeT();
1420 // TODO: Possibly make subtargetHasRegister const.
1421 bool subtargetHasRegister(const MCRegisterInfo &MRI, MCRegister Reg);
1422 bool ParseDirectiveAMDGPUHsaKernel();
1423
1424 bool ParseDirectiveISAVersion();
1425 bool ParseDirectiveHSAMetadata();
1426 bool ParseDirectivePALMetadataBegin();
1427 bool ParseDirectivePALMetadata();
1428 bool ParseDirectiveAMDGPULDS();
1429
1430 /// Common code to parse out a block of text (typically YAML) between start and
1431 /// end directives.
1432 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1433 const char *AssemblerDirectiveEnd,
1434 std::string &CollectString);
1435
1436 bool AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
1437 RegisterKind RegKind, MCRegister Reg1, SMLoc Loc);
1438 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1439 unsigned &RegNum, unsigned &RegWidth,
1440 bool RestoreOnFailure = false);
1441 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1442 unsigned &RegNum, unsigned &RegWidth,
1444 MCRegister ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1445 unsigned &RegWidth,
1447 MCRegister ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1448 unsigned &RegWidth,
1450 MCRegister ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1451 unsigned &RegWidth,
1453 bool ParseRegRange(unsigned &Num, unsigned &Width, unsigned &SubReg);
1454 MCRegister getRegularReg(RegisterKind RegKind, unsigned RegNum,
1455 unsigned SubReg, unsigned RegWidth, SMLoc Loc);
1456
1457 bool isRegister();
1458 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1459 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1460 void initializeGprCountSymbol(RegisterKind RegKind);
1461 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1462 unsigned RegWidth);
1463 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1464 bool IsAtomic);
1465
1466public:
1467 enum OperandMode {
1468 OperandMode_Default,
1469 OperandMode_NSA,
1470 };
1471
1472 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1473
1474 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1475 const MCInstrInfo &MII,
1476 const MCTargetOptions &Options)
1477 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1479
1480 if (getFeatureBits().none()) {
1481 // Set default features.
1482 copySTI().ToggleFeature("southern-islands");
1483 }
1484
1485 FeatureBitset FB = getFeatureBits();
1486 if (!FB[AMDGPU::FeatureWavefrontSize64] &&
1487 !FB[AMDGPU::FeatureWavefrontSize32]) {
1488 // If there is no default wave size it must be a generation before gfx10,
1489 // these have FeatureWavefrontSize64 in their definition already. For
1490 // gfx10+ set wave32 as a default.
1491 copySTI().ToggleFeature(AMDGPU::FeatureWavefrontSize32);
1492 }
1493
1494 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1495
1497 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1498 createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
1499 createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
1500 createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping);
1501 } else {
1502 createConstantSymbol(".option.machine_version_major", ISA.Major);
1503 createConstantSymbol(".option.machine_version_minor", ISA.Minor);
1504 createConstantSymbol(".option.machine_version_stepping", ISA.Stepping);
1505 }
1506 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1507 initializeGprCountSymbol(IS_VGPR);
1508 initializeGprCountSymbol(IS_SGPR);
1509 } else
1510 KernelScope.initialize(getContext());
1511
1512 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1513 createConstantSymbol(Symbol, Code);
1514
1515 createConstantSymbol("UC_VERSION_W64_BIT", 0x2000);
1516 createConstantSymbol("UC_VERSION_W32_BIT", 0x4000);
1517 createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000);
1518 }
1519
1520 bool hasMIMG_R128() const {
1521 return AMDGPU::hasMIMG_R128(getSTI());
1522 }
1523
1524 bool hasPackedD16() const {
1525 return AMDGPU::hasPackedD16(getSTI());
1526 }
1527
1528 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1529
1530 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1531
1532 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1533
1534 bool isSI() const {
1535 return AMDGPU::isSI(getSTI());
1536 }
1537
1538 bool isCI() const {
1539 return AMDGPU::isCI(getSTI());
1540 }
1541
1542 bool isVI() const {
1543 return AMDGPU::isVI(getSTI());
1544 }
1545
1546 bool isGFX9() const {
1547 return AMDGPU::isGFX9(getSTI());
1548 }
1549
1550 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1551 bool isGFX90A() const {
1552 return AMDGPU::isGFX90A(getSTI());
1553 }
1554
1555 bool isGFX940() const {
1556 return AMDGPU::isGFX940(getSTI());
1557 }
1558
1559 bool isGFX9Plus() const {
1560 return AMDGPU::isGFX9Plus(getSTI());
1561 }
1562
1563 bool isGFX10() const {
1564 return AMDGPU::isGFX10(getSTI());
1565 }
1566
1567 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1568
1569 bool isGFX11() const {
1570 return AMDGPU::isGFX11(getSTI());
1571 }
1572
1573 bool isGFX11Plus() const {
1574 return AMDGPU::isGFX11Plus(getSTI());
1575 }
1576
1577 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1578
1579 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1580
1581 bool isGFX1250() const { return AMDGPU::isGFX1250(getSTI()); }
1582
1583 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1584
1585 bool isGFX10_BEncoding() const {
1587 }
1588
1589 bool hasInv2PiInlineImm() const {
1590 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1591 }
1592
1593 bool has64BitLiterals() const {
1594 return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1595 }
1596
1597 bool hasFlatOffsets() const {
1598 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1599 }
1600
1601 bool hasTrue16Insts() const {
1602 return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1603 }
1604
1605 bool hasArchitectedFlatScratch() const {
1606 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1607 }
1608
1609 bool hasSGPR102_SGPR103() const {
1610 return !isVI() && !isGFX9();
1611 }
1612
1613 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1614
1615 bool hasIntClamp() const {
1616 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1617 }
1618
1619 bool hasPartialNSAEncoding() const {
1620 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1621 }
1622
1623 bool hasGloballyAddressableScratch() const {
1624 return getFeatureBits()[AMDGPU::FeatureGloballyAddressableScratch];
1625 }
1626
1627 unsigned getNSAMaxSize(bool HasSampler = false) const {
1628 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1629 }
1630
1631 unsigned getMaxNumUserSGPRs() const {
1633 }
1634
1635 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1636
1637 AMDGPUTargetStreamer &getTargetStreamer() {
1639 return static_cast<AMDGPUTargetStreamer &>(TS);
1640 }
1641
1642 const MCRegisterInfo *getMRI() const {
1643 // We need this const_cast because for some reason getContext() is not const
1644 // in MCAsmParser.
1645 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1646 }
1647
1648 const MCInstrInfo *getMII() const {
1649 return &MII;
1650 }
1651
1652 const FeatureBitset &getFeatureBits() const {
1653 return getSTI().getFeatureBits();
1654 }
1655
1656 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1657 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1658 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1659
1660 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1661 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1662 bool isForcedDPP() const { return ForcedDPP; }
1663 bool isForcedSDWA() const { return ForcedSDWA; }
1664 ArrayRef<unsigned> getMatchedVariants() const;
1665 StringRef getMatchedVariantName() const;
1666
1667 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1668 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1669 bool RestoreOnFailure);
1670 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1672 SMLoc &EndLoc) override;
1673 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1675 unsigned Kind) override;
1676 bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1679 bool MatchingInlineAsm) override;
1680 bool ParseDirective(AsmToken DirectiveID) override;
1681 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1682 OperandMode Mode = OperandMode_Default);
1683 StringRef parseMnemonicSuffix(StringRef Name);
1685 SMLoc NameLoc, OperandVector &Operands) override;
1686 //bool ProcessInstruction(MCInst &Inst);
1687
1689
1690 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1691
1693 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1694 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1695 std::function<bool(int64_t &)> ConvertResult = nullptr);
1696
1697 ParseStatus parseOperandArrayWithPrefix(
1698 const char *Prefix, OperandVector &Operands,
1699 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1700 bool (*ConvertResult)(int64_t &) = nullptr);
1701
1703 parseNamedBit(StringRef Name, OperandVector &Operands,
1704 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1705 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1707 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1708 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1709 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1710 SMLoc &StringLoc);
1711 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1714 int64_t &IntVal);
1715 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1718 AMDGPUOperand::ImmTy Type);
1719
1720 bool isModifier();
1721 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1722 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1723 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1724 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1725 bool parseSP3NegModifier();
1726 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1727 bool HasLit = false, bool HasLit64 = false);
1729 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1730 bool HasLit = false, bool HasLit64 = false);
1731 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1732 bool AllowImm = true);
1733 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1734 bool AllowImm = true);
1735 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1736 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1737 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1738 ParseStatus tryParseIndexKey(OperandVector &Operands,
1739 AMDGPUOperand::ImmTy ImmTy);
1740 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1741 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1742 ParseStatus parseIndexKey32bit(OperandVector &Operands);
1743 ParseStatus tryParseMatrixFMT(OperandVector &Operands, StringRef Name,
1744 AMDGPUOperand::ImmTy Type);
1745 ParseStatus parseMatrixAFMT(OperandVector &Operands);
1746 ParseStatus parseMatrixBFMT(OperandVector &Operands);
1747 ParseStatus tryParseMatrixScale(OperandVector &Operands, StringRef Name,
1748 AMDGPUOperand::ImmTy Type);
1749 ParseStatus parseMatrixAScale(OperandVector &Operands);
1750 ParseStatus parseMatrixBScale(OperandVector &Operands);
1751 ParseStatus tryParseMatrixScaleFmt(OperandVector &Operands, StringRef Name,
1752 AMDGPUOperand::ImmTy Type);
1753 ParseStatus parseMatrixAScaleFmt(OperandVector &Operands);
1754 ParseStatus parseMatrixBScaleFmt(OperandVector &Operands);
1755
1756 ParseStatus parseDfmtNfmt(int64_t &Format);
1757 ParseStatus parseUfmt(int64_t &Format);
1758 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1759 int64_t &Format);
1760 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1761 int64_t &Format);
1762 ParseStatus parseFORMAT(OperandVector &Operands);
1763 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1764 ParseStatus parseNumericFormat(int64_t &Format);
1765 ParseStatus parseFlatOffset(OperandVector &Operands);
1766 ParseStatus parseR128A16(OperandVector &Operands);
1768 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1769 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1770
1771 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1772
1773 bool parseCnt(int64_t &IntVal);
1774 ParseStatus parseSWaitCnt(OperandVector &Operands);
1775
1776 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1777 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1778 ParseStatus parseDepCtr(OperandVector &Operands);
1779
1780 bool parseDelay(int64_t &Delay);
1781 ParseStatus parseSDelayALU(OperandVector &Operands);
1782
1783 ParseStatus parseHwreg(OperandVector &Operands);
1784
1785private:
1786 struct OperandInfoTy {
1787 SMLoc Loc;
1788 int64_t Val;
1789 bool IsSymbolic = false;
1790 bool IsDefined = false;
1791
1792 OperandInfoTy(int64_t Val) : Val(Val) {}
1793 };
1794
1795 struct StructuredOpField : OperandInfoTy {
1798 unsigned Width;
1799 bool IsDefined = false;
1800
1801 StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width,
1802 int64_t Default)
1803 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1804 virtual ~StructuredOpField() = default;
1805
1806 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1807 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1808 return false;
1809 }
1810
1811 virtual bool validate(AMDGPUAsmParser &Parser) const {
1812 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1813 return Error(Parser, "not supported on this GPU");
1814 if (!isUIntN(Width, Val))
1815 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1816 return true;
1817 }
1818 };
1819
1820 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1821 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1822
1823 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1824 bool validateSendMsg(const OperandInfoTy &Msg,
1825 const OperandInfoTy &Op,
1826 const OperandInfoTy &Stream);
1827
1828 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1829 OperandInfoTy &Width);
1830
1831 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1832 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1833 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1834
1835 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1836 const OperandVector &Operands) const;
1837 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1838 SMLoc getRegLoc(MCRegister Reg, const OperandVector &Operands) const;
1839 SMLoc getLitLoc(const OperandVector &Operands,
1840 bool SearchMandatoryLiterals = false) const;
1841 SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1842 SMLoc getConstLoc(const OperandVector &Operands) const;
1843 SMLoc getInstLoc(const OperandVector &Operands) const;
1844
1845 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1846 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1847 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1848 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1849 bool validateSOPLiteral(const MCInst &Inst) const;
1850 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1851 std::optional<unsigned> checkVOPDRegBankConstraints(const MCInst &Inst,
1852 bool AsVOPD3);
1853 bool validateVOPD(const MCInst &Inst, const OperandVector &Operands);
1854 bool tryVOPD(const MCInst &Inst);
1855 bool tryVOPD3(const MCInst &Inst);
1856 bool tryAnotherVOPDEncoding(const MCInst &Inst);
1857
1858 bool validateIntClampSupported(const MCInst &Inst);
1859 bool validateMIMGAtomicDMask(const MCInst &Inst);
1860 bool validateMIMGGatherDMask(const MCInst &Inst);
1861 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1862 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1863 bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1864 bool validateMIMGD16(const MCInst &Inst);
1865 bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);
1866 bool validateTensorR128(const MCInst &Inst);
1867 bool validateMIMGMSAA(const MCInst &Inst);
1868 bool validateOpSel(const MCInst &Inst);
1869 bool validateTrue16OpSel(const MCInst &Inst);
1870 bool validateNeg(const MCInst &Inst, AMDGPU::OpName OpName);
1871 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1872 bool validateVccOperand(MCRegister Reg) const;
1873 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1874 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1875 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1876 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1877 bool validateAGPRLdSt(const MCInst &Inst) const;
1878 bool validateVGPRAlign(const MCInst &Inst) const;
1879 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1880 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1881 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1882 bool validateDivScale(const MCInst &Inst);
1883 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1884 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1885 const SMLoc &IDLoc);
1886 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1887 const unsigned CPol);
1888 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1889 std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1890 bool validateWMMA(const MCInst &Inst, const OperandVector &Operands);
1891 unsigned getConstantBusLimit(unsigned Opcode) const;
1892 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1893 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1894 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1895
1896 bool isSupportedMnemo(StringRef Mnemo,
1897 const FeatureBitset &FBS);
1898 bool isSupportedMnemo(StringRef Mnemo,
1899 const FeatureBitset &FBS,
1900 ArrayRef<unsigned> Variants);
1901 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1902
1903 bool isId(const StringRef Id) const;
1904 bool isId(const AsmToken &Token, const StringRef Id) const;
1905 bool isToken(const AsmToken::TokenKind Kind) const;
1906 StringRef getId() const;
1907 bool trySkipId(const StringRef Id);
1908 bool trySkipId(const StringRef Pref, const StringRef Id);
1909 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1910 bool trySkipToken(const AsmToken::TokenKind Kind);
1911 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1912 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1913 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1914
1915 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1916 AsmToken::TokenKind getTokenKind() const;
1917 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1919 StringRef getTokenStr() const;
1920 AsmToken peekToken(bool ShouldSkipSpace = true);
1921 AsmToken getToken() const;
1922 SMLoc getLoc() const;
1923 void lex();
1924
1925public:
1926 void onBeginOfFile() override;
1927 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1928
1929 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1930
1931 ParseStatus parseExpTgt(OperandVector &Operands);
1932 ParseStatus parseSendMsg(OperandVector &Operands);
1933 ParseStatus parseInterpSlot(OperandVector &Operands);
1934 ParseStatus parseInterpAttr(OperandVector &Operands);
1935 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1936 ParseStatus parseBoolReg(OperandVector &Operands);
1937
1938 bool parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
1939 const unsigned MaxVal, const Twine &ErrMsg,
1940 SMLoc &Loc);
1941 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1942 const unsigned MinVal,
1943 const unsigned MaxVal,
1944 const StringRef ErrMsg);
1945 ParseStatus parseSwizzle(OperandVector &Operands);
1946 bool parseSwizzleOffset(int64_t &Imm);
1947 bool parseSwizzleMacro(int64_t &Imm);
1948 bool parseSwizzleQuadPerm(int64_t &Imm);
1949 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1950 bool parseSwizzleBroadcast(int64_t &Imm);
1951 bool parseSwizzleSwap(int64_t &Imm);
1952 bool parseSwizzleReverse(int64_t &Imm);
1953 bool parseSwizzleFFT(int64_t &Imm);
1954 bool parseSwizzleRotate(int64_t &Imm);
1955
1956 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1957 int64_t parseGPRIdxMacro();
1958
1959 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1960 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1961
1962 ParseStatus parseOModSI(OperandVector &Operands);
1963
1964 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1965 OptionalImmIndexMap &OptionalIdx);
1966 void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands);
1967 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1968 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1969 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1970 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1971
1972 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1973 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1974 OptionalImmIndexMap &OptionalIdx);
1975 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1976 OptionalImmIndexMap &OptionalIdx);
1977
1978 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1979 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1980
1981 bool parseDimId(unsigned &Encoding);
1983 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1985 ParseStatus parseDPPCtrl(OperandVector &Operands);
1986 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1987 int64_t parseDPPCtrlSel(StringRef Ctrl);
1988 int64_t parseDPPCtrlPerm();
1989 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1990 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1991 cvtDPP(Inst, Operands, true);
1992 }
1993 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1994 bool IsDPP8 = false);
1995 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1996 cvtVOP3DPP(Inst, Operands, true);
1997 }
1998
1999 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
2000 AMDGPUOperand::ImmTy Type);
2001 ParseStatus parseSDWADstUnused(OperandVector &Operands);
2002 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
2003 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
2004 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
2005 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
2006 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
2007 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
2008 uint64_t BasicInstType,
2009 bool SkipDstVcc = false,
2010 bool SkipSrcVcc = false);
2011
2012 ParseStatus parseEndpgm(OperandVector &Operands);
2013
2015};
2016
2017} // end anonymous namespace
2018
2019// May be called with integer type with equivalent bitwidth.
2020static const fltSemantics *getFltSemantics(unsigned Size) {
2021 switch (Size) {
2022 case 4:
2023 return &APFloat::IEEEsingle();
2024 case 8:
2025 return &APFloat::IEEEdouble();
2026 case 2:
2027 return &APFloat::IEEEhalf();
2028 default:
2029 llvm_unreachable("unsupported fp type");
2030 }
2031}
2032
2034 return getFltSemantics(VT.getSizeInBits() / 8);
2035}
2036
2038 switch (OperandType) {
2039 // When floating-point immediate is used as operand of type i16, the 32-bit
2040 // representation of the constant truncated to the 16 LSBs should be used.
2055 return &APFloat::IEEEsingle();
2062 return &APFloat::IEEEdouble();
2069 return &APFloat::IEEEhalf();
2074 return &APFloat::BFloat();
2075 default:
2076 llvm_unreachable("unsupported fp type");
2077 }
2078}
2079
2080//===----------------------------------------------------------------------===//
2081// Operand
2082//===----------------------------------------------------------------------===//
2083
2084static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
2085 bool Lost;
2086
2087 // Convert literal to single precision
2089 APFloat::rmNearestTiesToEven,
2090 &Lost);
2091 // We allow precision lost but not overflow or underflow
2092 if (Status != APFloat::opOK &&
2093 Lost &&
2094 ((Status & APFloat::opOverflow) != 0 ||
2095 (Status & APFloat::opUnderflow) != 0)) {
2096 return false;
2097 }
2098
2099 return true;
2100}
2101
2102static bool isSafeTruncation(int64_t Val, unsigned Size) {
2103 return isUIntN(Size, Val) || isIntN(Size, Val);
2104}
2105
2106static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2107 if (VT.getScalarType() == MVT::i16)
2108 return isInlinableLiteral32(Val, HasInv2Pi);
2109
2110 if (VT.getScalarType() == MVT::f16)
2111 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2112
2113 assert(VT.getScalarType() == MVT::bf16);
2114
2115 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2116}
2117
2118bool AMDGPUOperand::isInlinableImm(MVT type) const {
2119
2120 // This is a hack to enable named inline values like
2121 // shared_base with both 32-bit and 64-bit operands.
2122 // Note that these values are defined as
2123 // 32-bit operands only.
2124 if (isInlineValue()) {
2125 return true;
2126 }
2127
2128 if (!isImmTy(ImmTyNone)) {
2129 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2130 return false;
2131 }
2132 // TODO: We should avoid using host float here. It would be better to
2133 // check the float bit values which is what a few other places do.
2134 // We've had bot failures before due to weird NaN support on mips hosts.
2135
2136 APInt Literal(64, Imm.Val);
2137
2138 if (Imm.IsFPImm) { // We got fp literal token
2139 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2141 AsmParser->hasInv2PiInlineImm());
2142 }
2143
2144 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2145 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2146 return false;
2147
2148 if (type.getScalarSizeInBits() == 16) {
2149 bool Lost = false;
2150 switch (type.getScalarType().SimpleTy) {
2151 default:
2152 llvm_unreachable("unknown 16-bit type");
2153 case MVT::bf16:
2154 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2155 &Lost);
2156 break;
2157 case MVT::f16:
2158 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2159 &Lost);
2160 break;
2161 case MVT::i16:
2162 FPLiteral.convert(APFloatBase::IEEEsingle(),
2163 APFloat::rmNearestTiesToEven, &Lost);
2164 break;
2165 }
2166 // We need to use 32-bit representation here because when a floating-point
2167 // inline constant is used as an i16 operand, its 32-bit representation
2168 // representation will be used. We will need the 32-bit value to check if
2169 // it is FP inline constant.
2170 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2171 return isInlineableLiteralOp16(ImmVal, type,
2172 AsmParser->hasInv2PiInlineImm());
2173 }
2174
2175 // Check if single precision literal is inlinable
2177 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2178 AsmParser->hasInv2PiInlineImm());
2179 }
2180
2181 // We got int literal token.
2182 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2184 AsmParser->hasInv2PiInlineImm());
2185 }
2186
2187 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2188 return false;
2189 }
2190
2191 if (type.getScalarSizeInBits() == 16) {
2193 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2194 type, AsmParser->hasInv2PiInlineImm());
2195 }
2196
2198 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2199 AsmParser->hasInv2PiInlineImm());
2200}
2201
2202bool AMDGPUOperand::isLiteralImm(MVT type) const {
2203 // Check that this immediate can be added as literal
2204 if (!isImmTy(ImmTyNone)) {
2205 return false;
2206 }
2207
2208 bool Allow64Bit =
2209 (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();
2210
2211 if (!Imm.IsFPImm) {
2212 // We got int literal token.
2213
2214 if (type == MVT::f64 && hasFPModifiers()) {
2215 // Cannot apply fp modifiers to int literals preserving the same semantics
2216 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2217 // disable these cases.
2218 return false;
2219 }
2220
2221 unsigned Size = type.getSizeInBits();
2222 if (Size == 64) {
2223 if (Allow64Bit && !AMDGPU::isValid32BitLiteral(Imm.Val, false))
2224 return true;
2225 Size = 32;
2226 }
2227
2228 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2229 // types.
2230 return isSafeTruncation(Imm.Val, Size);
2231 }
2232
2233 // We got fp literal token
2234 if (type == MVT::f64) { // Expected 64-bit fp operand
2235 // We would set low 64-bits of literal to zeroes but we accept this literals
2236 return true;
2237 }
2238
2239 if (type == MVT::i64) { // Expected 64-bit int operand
2240 // We don't allow fp literals in 64-bit integer instructions. It is
2241 // unclear how we should encode them.
2242 return false;
2243 }
2244
2245 // We allow fp literals with f16x2 operands assuming that the specified
2246 // literal goes into the lower half and the upper half is zero. We also
2247 // require that the literal may be losslessly converted to f16.
2248 //
2249 // For i16x2 operands, we assume that the specified literal is encoded as a
2250 // single-precision float. This is pretty odd, but it matches SP3 and what
2251 // happens in hardware.
2252 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2253 : (type == MVT::v2i16) ? MVT::f32
2254 : (type == MVT::v2f32) ? MVT::f32
2255 : type;
2256
2257 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2258 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2259}
2260
2261bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2262 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2263}
2264
2265bool AMDGPUOperand::isVRegWithInputMods() const {
2266 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2267 // GFX90A allows DPP on 64-bit operands.
2268 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2269 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2270}
2271
2272template <bool IsFake16>
2273bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {
2274 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2275 : AMDGPU::VGPR_16_Lo128RegClassID);
2276}
2277
2278template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2279 return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2280 : AMDGPU::VGPR_16RegClassID);
2281}
2282
2283bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2284 if (AsmParser->isVI())
2285 return isVReg32();
2286 if (AsmParser->isGFX9Plus())
2287 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2288 return false;
2289}
2290
2291bool AMDGPUOperand::isSDWAFP16Operand() const {
2292 return isSDWAOperand(MVT::f16);
2293}
2294
2295bool AMDGPUOperand::isSDWAFP32Operand() const {
2296 return isSDWAOperand(MVT::f32);
2297}
2298
2299bool AMDGPUOperand::isSDWAInt16Operand() const {
2300 return isSDWAOperand(MVT::i16);
2301}
2302
2303bool AMDGPUOperand::isSDWAInt32Operand() const {
2304 return isSDWAOperand(MVT::i32);
2305}
2306
2307bool AMDGPUOperand::isBoolReg() const {
2308 auto FB = AsmParser->getFeatureBits();
2309 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
2310 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2311}
2312
2313uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2314{
2315 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2316 assert(Size == 2 || Size == 4 || Size == 8);
2317
2318 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2319
2320 if (Imm.Mods.Abs) {
2321 Val &= ~FpSignMask;
2322 }
2323 if (Imm.Mods.Neg) {
2324 Val ^= FpSignMask;
2325 }
2326
2327 return Val;
2328}
2329
2330void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2331 if (isExpr()) {
2333 return;
2334 }
2335
2336 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2337 Inst.getNumOperands())) {
2338 addLiteralImmOperand(Inst, Imm.Val,
2339 ApplyModifiers &
2340 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2341 } else {
2342 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2344 setImmKindNone();
2345 }
2346}
2347
2348void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2349 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2350 auto OpNum = Inst.getNumOperands();
2351 // Check that this operand accepts literals
2352 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2353
2354 if (ApplyModifiers) {
2355 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2356 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2357 Val = applyInputFPModifiers(Val, Size);
2358 }
2359
2360 APInt Literal(64, Val);
2361 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2362
2363 if (Imm.IsFPImm) { // We got fp literal token
2364 switch (OpTy) {
2370 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2371 AsmParser->hasInv2PiInlineImm())) {
2372 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2373 setImmKindConst();
2374 return;
2375 }
2376
2377 // Non-inlineable
2378 if (AMDGPU::isSISrcFPOperand(InstDesc,
2379 OpNum)) { // Expected 64-bit fp operand
2380 bool HasMandatoryLiteral =
2381 AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::imm);
2382 // For fp operands we check if low 32 bits are zeros
2383 if (Literal.getLoBits(32) != 0 &&
2384 (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
2385 !HasMandatoryLiteral) {
2386 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(
2387 Inst.getLoc(),
2388 "Can't encode literal as exact 64-bit floating-point operand. "
2389 "Low 32-bits will be set to zero");
2390 Val &= 0xffffffff00000000u;
2391 }
2392
2394 setImmKindLiteral();
2395 return;
2396 }
2397
2398 // We don't allow fp literals in 64-bit integer instructions. It is
2399 // unclear how we should encode them. This case should be checked earlier
2400 // in predicate methods (isLiteralImm())
2401 llvm_unreachable("fp literal in 64-bit integer instruction.");
2402
2405 setImmKindMandatoryLiteral();
2406 return;
2407
2412 if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) {
2413 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2414 // loss of precision. The constant represents ideomatic fp32 value of
2415 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2416 // bits. Prevent rounding below.
2417 Inst.addOperand(MCOperand::createImm(0x3e22));
2418 setImmKindLiteral();
2419 return;
2420 }
2421 [[fallthrough]];
2422
2443 bool lost;
2444 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2445 // Convert literal to single precision
2446 FPLiteral.convert(*getOpFltSemantics(OpTy),
2447 APFloat::rmNearestTiesToEven, &lost);
2448 // We allow precision lost but not overflow or underflow. This should be
2449 // checked earlier in isLiteralImm()
2450
2451 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2452 Inst.addOperand(MCOperand::createImm(ImmVal));
2453 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2454 setImmKindMandatoryLiteral();
2455 } else {
2456 setImmKindLiteral();
2457 }
2458 return;
2459 }
2460 default:
2461 llvm_unreachable("invalid operand size");
2462 }
2463
2464 return;
2465 }
2466
2467 // We got int literal token.
2468 // Only sign extend inline immediates.
2469 switch (OpTy) {
2482 if (isSafeTruncation(Val, 32) &&
2483 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2484 AsmParser->hasInv2PiInlineImm())) {
2486 setImmKindConst();
2487 return;
2488 }
2489 [[fallthrough]];
2490
2492
2494 setImmKindLiteral();
2495 return;
2496
2499 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2501 setImmKindConst();
2502 return;
2503 }
2504
2505 // When the 32 MSBs are not zero (effectively means it can't be safely
2506 // truncated to uint32_t), if the target doesn't support 64-bit literals, or
2507 // the lit modifier is explicitly used, we need to truncate it to the 32
2508 // LSBs.
2509 if (!AsmParser->has64BitLiterals() || getModifiers().Lit)
2510 Val = Lo_32(Val);
2511
2513 setImmKindLiteral();
2514 return;
2515
2519 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2521 setImmKindConst();
2522 return;
2523 }
2524
2525 // If the target doesn't support 64-bit literals, we need to use the
2526 // constant as the high 32 MSBs of a double-precision floating point value.
2527 if (!AsmParser->has64BitLiterals()) {
2528 Val = static_cast<uint64_t>(Val) << 32;
2529 } else {
2530 // Now the target does support 64-bit literals, there are two cases
2531 // where we still want to use src_literal encoding:
2532 // 1) explicitly forced by using lit modifier;
2533 // 2) the value is a valid 32-bit representation (signed or unsigned),
2534 // meanwhile not forced by lit64 modifier.
2535 if (getModifiers().Lit ||
2536 (!getModifiers().Lit64 && (isInt<32>(Val) || isUInt<32>(Val))))
2537 Val = static_cast<uint64_t>(Val) << 32;
2538 }
2539
2541 setImmKindLiteral();
2542 return;
2543
2546 if (isSafeTruncation(Val, 16) &&
2547 AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) {
2549 setImmKindConst();
2550 return;
2551 }
2552
2553 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2554 setImmKindLiteral();
2555 return;
2556
2559 if (isSafeTruncation(Val, 16) &&
2560 AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2561 AsmParser->hasInv2PiInlineImm())) {
2563 setImmKindConst();
2564 return;
2565 }
2566
2567 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2568 setImmKindLiteral();
2569 return;
2570
2573 if (isSafeTruncation(Val, 16) &&
2574 AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2575 AsmParser->hasInv2PiInlineImm())) {
2577 setImmKindConst();
2578 return;
2579 }
2580
2581 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2582 setImmKindLiteral();
2583 return;
2584
2586 assert(isSafeTruncation(Val, 16));
2587 assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));
2589 return;
2590 }
2592 assert(isSafeTruncation(Val, 16));
2593 assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2594 AsmParser->hasInv2PiInlineImm()));
2595
2597 return;
2598 }
2599
2601 assert(isSafeTruncation(Val, 16));
2602 assert(AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2603 AsmParser->hasInv2PiInlineImm()));
2604
2606 return;
2607 }
2608
2610 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2611 setImmKindMandatoryLiteral();
2612 return;
2614 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2615 setImmKindMandatoryLiteral();
2616 return;
2618 if ((isInt<32>(Val) || isUInt<32>(Val)) && !getModifiers().Lit64)
2619 Val <<= 32;
2620
2622 setImmKindMandatoryLiteral();
2623 return;
2624 default:
2625 llvm_unreachable("invalid operand size");
2626 }
2627}
2628
2629void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2630 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2631}
2632
2633bool AMDGPUOperand::isInlineValue() const {
2634 return isRegKind() && ::isInlineValue(getReg());
2635}
2636
2637//===----------------------------------------------------------------------===//
2638// AsmParser
2639//===----------------------------------------------------------------------===//
2640
2641void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2642 // TODO: make those pre-defined variables read-only.
2643 // Currently there is none suitable machinery in the core llvm-mc for this.
2644 // MCSymbol::isRedefinable is intended for another purpose, and
2645 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2646 MCContext &Ctx = getContext();
2647 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2648 Sym->setVariableValue(MCConstantExpr::create(Val, Ctx));
2649}
2650
2651static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2652 if (Is == IS_VGPR) {
2653 switch (RegWidth) {
2654 default: return -1;
2655 case 32:
2656 return AMDGPU::VGPR_32RegClassID;
2657 case 64:
2658 return AMDGPU::VReg_64RegClassID;
2659 case 96:
2660 return AMDGPU::VReg_96RegClassID;
2661 case 128:
2662 return AMDGPU::VReg_128RegClassID;
2663 case 160:
2664 return AMDGPU::VReg_160RegClassID;
2665 case 192:
2666 return AMDGPU::VReg_192RegClassID;
2667 case 224:
2668 return AMDGPU::VReg_224RegClassID;
2669 case 256:
2670 return AMDGPU::VReg_256RegClassID;
2671 case 288:
2672 return AMDGPU::VReg_288RegClassID;
2673 case 320:
2674 return AMDGPU::VReg_320RegClassID;
2675 case 352:
2676 return AMDGPU::VReg_352RegClassID;
2677 case 384:
2678 return AMDGPU::VReg_384RegClassID;
2679 case 512:
2680 return AMDGPU::VReg_512RegClassID;
2681 case 1024:
2682 return AMDGPU::VReg_1024RegClassID;
2683 }
2684 } else if (Is == IS_TTMP) {
2685 switch (RegWidth) {
2686 default: return -1;
2687 case 32:
2688 return AMDGPU::TTMP_32RegClassID;
2689 case 64:
2690 return AMDGPU::TTMP_64RegClassID;
2691 case 128:
2692 return AMDGPU::TTMP_128RegClassID;
2693 case 256:
2694 return AMDGPU::TTMP_256RegClassID;
2695 case 512:
2696 return AMDGPU::TTMP_512RegClassID;
2697 }
2698 } else if (Is == IS_SGPR) {
2699 switch (RegWidth) {
2700 default: return -1;
2701 case 32:
2702 return AMDGPU::SGPR_32RegClassID;
2703 case 64:
2704 return AMDGPU::SGPR_64RegClassID;
2705 case 96:
2706 return AMDGPU::SGPR_96RegClassID;
2707 case 128:
2708 return AMDGPU::SGPR_128RegClassID;
2709 case 160:
2710 return AMDGPU::SGPR_160RegClassID;
2711 case 192:
2712 return AMDGPU::SGPR_192RegClassID;
2713 case 224:
2714 return AMDGPU::SGPR_224RegClassID;
2715 case 256:
2716 return AMDGPU::SGPR_256RegClassID;
2717 case 288:
2718 return AMDGPU::SGPR_288RegClassID;
2719 case 320:
2720 return AMDGPU::SGPR_320RegClassID;
2721 case 352:
2722 return AMDGPU::SGPR_352RegClassID;
2723 case 384:
2724 return AMDGPU::SGPR_384RegClassID;
2725 case 512:
2726 return AMDGPU::SGPR_512RegClassID;
2727 }
2728 } else if (Is == IS_AGPR) {
2729 switch (RegWidth) {
2730 default: return -1;
2731 case 32:
2732 return AMDGPU::AGPR_32RegClassID;
2733 case 64:
2734 return AMDGPU::AReg_64RegClassID;
2735 case 96:
2736 return AMDGPU::AReg_96RegClassID;
2737 case 128:
2738 return AMDGPU::AReg_128RegClassID;
2739 case 160:
2740 return AMDGPU::AReg_160RegClassID;
2741 case 192:
2742 return AMDGPU::AReg_192RegClassID;
2743 case 224:
2744 return AMDGPU::AReg_224RegClassID;
2745 case 256:
2746 return AMDGPU::AReg_256RegClassID;
2747 case 288:
2748 return AMDGPU::AReg_288RegClassID;
2749 case 320:
2750 return AMDGPU::AReg_320RegClassID;
2751 case 352:
2752 return AMDGPU::AReg_352RegClassID;
2753 case 384:
2754 return AMDGPU::AReg_384RegClassID;
2755 case 512:
2756 return AMDGPU::AReg_512RegClassID;
2757 case 1024:
2758 return AMDGPU::AReg_1024RegClassID;
2759 }
2760 }
2761 return -1;
2762}
2763
2766 .Case("exec", AMDGPU::EXEC)
2767 .Case("vcc", AMDGPU::VCC)
2768 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2769 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2770 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2771 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2772 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2773 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2774 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2775 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2776 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2777 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2778 .Case("src_flat_scratch_base_lo", AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)
2779 .Case("src_flat_scratch_base_hi", AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)
2780 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2781 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2782 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2783 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2784 .Case("m0", AMDGPU::M0)
2785 .Case("vccz", AMDGPU::SRC_VCCZ)
2786 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2787 .Case("execz", AMDGPU::SRC_EXECZ)
2788 .Case("src_execz", AMDGPU::SRC_EXECZ)
2789 .Case("scc", AMDGPU::SRC_SCC)
2790 .Case("src_scc", AMDGPU::SRC_SCC)
2791 .Case("tba", AMDGPU::TBA)
2792 .Case("tma", AMDGPU::TMA)
2793 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2794 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2795 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2796 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2797 .Case("vcc_lo", AMDGPU::VCC_LO)
2798 .Case("vcc_hi", AMDGPU::VCC_HI)
2799 .Case("exec_lo", AMDGPU::EXEC_LO)
2800 .Case("exec_hi", AMDGPU::EXEC_HI)
2801 .Case("tma_lo", AMDGPU::TMA_LO)
2802 .Case("tma_hi", AMDGPU::TMA_HI)
2803 .Case("tba_lo", AMDGPU::TBA_LO)
2804 .Case("tba_hi", AMDGPU::TBA_HI)
2805 .Case("pc", AMDGPU::PC_REG)
2806 .Case("null", AMDGPU::SGPR_NULL)
2807 .Default(AMDGPU::NoRegister);
2808}
2809
2810bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2811 SMLoc &EndLoc, bool RestoreOnFailure) {
2812 auto R = parseRegister();
2813 if (!R) return true;
2814 assert(R->isReg());
2815 RegNo = R->getReg();
2816 StartLoc = R->getStartLoc();
2817 EndLoc = R->getEndLoc();
2818 return false;
2819}
2820
2821bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2822 SMLoc &EndLoc) {
2823 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2824}
2825
2826ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2827 SMLoc &EndLoc) {
2828 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2829 bool PendingErrors = getParser().hasPendingError();
2830 getParser().clearPendingErrors();
2831 if (PendingErrors)
2832 return ParseStatus::Failure;
2833 if (Result)
2834 return ParseStatus::NoMatch;
2835 return ParseStatus::Success;
2836}
2837
2838bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
2839 RegisterKind RegKind,
2840 MCRegister Reg1, SMLoc Loc) {
2841 switch (RegKind) {
2842 case IS_SPECIAL:
2843 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2844 Reg = AMDGPU::EXEC;
2845 RegWidth = 64;
2846 return true;
2847 }
2848 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2849 Reg = AMDGPU::FLAT_SCR;
2850 RegWidth = 64;
2851 return true;
2852 }
2853 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2854 Reg = AMDGPU::XNACK_MASK;
2855 RegWidth = 64;
2856 return true;
2857 }
2858 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2859 Reg = AMDGPU::VCC;
2860 RegWidth = 64;
2861 return true;
2862 }
2863 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2864 Reg = AMDGPU::TBA;
2865 RegWidth = 64;
2866 return true;
2867 }
2868 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2869 Reg = AMDGPU::TMA;
2870 RegWidth = 64;
2871 return true;
2872 }
2873 Error(Loc, "register does not fit in the list");
2874 return false;
2875 case IS_VGPR:
2876 case IS_SGPR:
2877 case IS_AGPR:
2878 case IS_TTMP:
2879 if (Reg1 != Reg + RegWidth / 32) {
2880 Error(Loc, "registers in a list must have consecutive indices");
2881 return false;
2882 }
2883 RegWidth += 32;
2884 return true;
2885 default:
2886 llvm_unreachable("unexpected register kind");
2887 }
2888}
2889
2890struct RegInfo {
2892 RegisterKind Kind;
2893};
2894
2895static constexpr RegInfo RegularRegisters[] = {
2896 {{"v"}, IS_VGPR},
2897 {{"s"}, IS_SGPR},
2898 {{"ttmp"}, IS_TTMP},
2899 {{"acc"}, IS_AGPR},
2900 {{"a"}, IS_AGPR},
2901};
2902
2903static bool isRegularReg(RegisterKind Kind) {
2904 return Kind == IS_VGPR ||
2905 Kind == IS_SGPR ||
2906 Kind == IS_TTMP ||
2907 Kind == IS_AGPR;
2908}
2909
2911 for (const RegInfo &Reg : RegularRegisters)
2912 if (Str.starts_with(Reg.Name))
2913 return &Reg;
2914 return nullptr;
2915}
2916
2917static bool getRegNum(StringRef Str, unsigned& Num) {
2918 return !Str.getAsInteger(10, Num);
2919}
2920
2921bool
2922AMDGPUAsmParser::isRegister(const AsmToken &Token,
2923 const AsmToken &NextToken) const {
2924
2925 // A list of consecutive registers: [s0,s1,s2,s3]
2926 if (Token.is(AsmToken::LBrac))
2927 return true;
2928
2929 if (!Token.is(AsmToken::Identifier))
2930 return false;
2931
2932 // A single register like s0 or a range of registers like s[0:1]
2933
2934 StringRef Str = Token.getString();
2935 const RegInfo *Reg = getRegularRegInfo(Str);
2936 if (Reg) {
2937 StringRef RegName = Reg->Name;
2938 StringRef RegSuffix = Str.substr(RegName.size());
2939 if (!RegSuffix.empty()) {
2940 RegSuffix.consume_back(".l");
2941 RegSuffix.consume_back(".h");
2942 unsigned Num;
2943 // A single register with an index: rXX
2944 if (getRegNum(RegSuffix, Num))
2945 return true;
2946 } else {
2947 // A range of registers: r[XX:YY].
2948 if (NextToken.is(AsmToken::LBrac))
2949 return true;
2950 }
2951 }
2952
2953 return getSpecialRegForName(Str).isValid();
2954}
2955
2956bool
2957AMDGPUAsmParser::isRegister()
2958{
2959 return isRegister(getToken(), peekToken());
2960}
2961
2962MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2963 unsigned SubReg, unsigned RegWidth,
2964 SMLoc Loc) {
2965 assert(isRegularReg(RegKind));
2966
2967 unsigned AlignSize = 1;
2968 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2969 // SGPR and TTMP registers must be aligned.
2970 // Max required alignment is 4 dwords.
2971 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2972 }
2973
2974 if (RegNum % AlignSize != 0) {
2975 Error(Loc, "invalid register alignment");
2976 return MCRegister();
2977 }
2978
2979 unsigned RegIdx = RegNum / AlignSize;
2980 int RCID = getRegClass(RegKind, RegWidth);
2981 if (RCID == -1) {
2982 Error(Loc, "invalid or unsupported register size");
2983 return MCRegister();
2984 }
2985
2986 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2987 const MCRegisterClass RC = TRI->getRegClass(RCID);
2988 if (RegIdx >= RC.getNumRegs()) {
2989 Error(Loc, "register index is out of range");
2990 return MCRegister();
2991 }
2992
2993 MCRegister Reg = RC.getRegister(RegIdx);
2994
2995 if (SubReg) {
2996 Reg = TRI->getSubReg(Reg, SubReg);
2997
2998 // Currently all regular registers have their .l and .h subregisters, so
2999 // we should never need to generate an error here.
3000 assert(Reg && "Invalid subregister!");
3001 }
3002
3003 return Reg;
3004}
3005
3006bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth,
3007 unsigned &SubReg) {
3008 int64_t RegLo, RegHi;
3009 if (!skipToken(AsmToken::LBrac, "missing register index"))
3010 return false;
3011
3012 SMLoc FirstIdxLoc = getLoc();
3013 SMLoc SecondIdxLoc;
3014
3015 if (!parseExpr(RegLo))
3016 return false;
3017
3018 if (trySkipToken(AsmToken::Colon)) {
3019 SecondIdxLoc = getLoc();
3020 if (!parseExpr(RegHi))
3021 return false;
3022 } else {
3023 RegHi = RegLo;
3024 }
3025
3026 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
3027 return false;
3028
3029 if (!isUInt<32>(RegLo)) {
3030 Error(FirstIdxLoc, "invalid register index");
3031 return false;
3032 }
3033
3034 if (!isUInt<32>(RegHi)) {
3035 Error(SecondIdxLoc, "invalid register index");
3036 return false;
3037 }
3038
3039 if (RegLo > RegHi) {
3040 Error(FirstIdxLoc, "first register index should not exceed second index");
3041 return false;
3042 }
3043
3044 if (RegHi == RegLo) {
3045 StringRef RegSuffix = getTokenStr();
3046 if (RegSuffix == ".l") {
3047 SubReg = AMDGPU::lo16;
3048 lex();
3049 } else if (RegSuffix == ".h") {
3050 SubReg = AMDGPU::hi16;
3051 lex();
3052 }
3053 }
3054
3055 Num = static_cast<unsigned>(RegLo);
3056 RegWidth = 32 * ((RegHi - RegLo) + 1);
3057
3058 return true;
3059}
3060
3061MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
3062 unsigned &RegNum,
3063 unsigned &RegWidth,
3064 SmallVectorImpl<AsmToken> &Tokens) {
3065 assert(isToken(AsmToken::Identifier));
3066 MCRegister Reg = getSpecialRegForName(getTokenStr());
3067 if (Reg) {
3068 RegNum = 0;
3069 RegWidth = 32;
3070 RegKind = IS_SPECIAL;
3071 Tokens.push_back(getToken());
3072 lex(); // skip register name
3073 }
3074 return Reg;
3075}
3076
3077MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
3078 unsigned &RegNum,
3079 unsigned &RegWidth,
3080 SmallVectorImpl<AsmToken> &Tokens) {
3081 assert(isToken(AsmToken::Identifier));
3082 StringRef RegName = getTokenStr();
3083 auto Loc = getLoc();
3084
3085 const RegInfo *RI = getRegularRegInfo(RegName);
3086 if (!RI) {
3087 Error(Loc, "invalid register name");
3088 return MCRegister();
3089 }
3090
3091 Tokens.push_back(getToken());
3092 lex(); // skip register name
3093
3094 RegKind = RI->Kind;
3095 StringRef RegSuffix = RegName.substr(RI->Name.size());
3096 unsigned SubReg = NoSubRegister;
3097 if (!RegSuffix.empty()) {
3098 if (RegSuffix.consume_back(".l"))
3099 SubReg = AMDGPU::lo16;
3100 else if (RegSuffix.consume_back(".h"))
3101 SubReg = AMDGPU::hi16;
3102
3103 // Single 32-bit register: vXX.
3104 if (!getRegNum(RegSuffix, RegNum)) {
3105 Error(Loc, "invalid register index");
3106 return MCRegister();
3107 }
3108 RegWidth = 32;
3109 } else {
3110 // Range of registers: v[XX:YY]. ":YY" is optional.
3111 if (!ParseRegRange(RegNum, RegWidth, SubReg))
3112 return MCRegister();
3113 }
3114
3115 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
3116}
3117
3118MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
3119 unsigned &RegNum, unsigned &RegWidth,
3120 SmallVectorImpl<AsmToken> &Tokens) {
3122 auto ListLoc = getLoc();
3123
3124 if (!skipToken(AsmToken::LBrac,
3125 "expected a register or a list of registers")) {
3126 return MCRegister();
3127 }
3128
3129 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
3130
3131 auto Loc = getLoc();
3132 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
3133 return MCRegister();
3134 if (RegWidth != 32) {
3135 Error(Loc, "expected a single 32-bit register");
3136 return MCRegister();
3137 }
3138
3139 for (; trySkipToken(AsmToken::Comma); ) {
3140 RegisterKind NextRegKind;
3141 MCRegister NextReg;
3142 unsigned NextRegNum, NextRegWidth;
3143 Loc = getLoc();
3144
3145 if (!ParseAMDGPURegister(NextRegKind, NextReg,
3146 NextRegNum, NextRegWidth,
3147 Tokens)) {
3148 return MCRegister();
3149 }
3150 if (NextRegWidth != 32) {
3151 Error(Loc, "expected a single 32-bit register");
3152 return MCRegister();
3153 }
3154 if (NextRegKind != RegKind) {
3155 Error(Loc, "registers in a list must be of the same kind");
3156 return MCRegister();
3157 }
3158 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
3159 return MCRegister();
3160 }
3161
3162 if (!skipToken(AsmToken::RBrac,
3163 "expected a comma or a closing square bracket")) {
3164 return MCRegister();
3165 }
3166
3167 if (isRegularReg(RegKind))
3168 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3169
3170 return Reg;
3171}
3172
3173bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3174 MCRegister &Reg, unsigned &RegNum,
3175 unsigned &RegWidth,
3176 SmallVectorImpl<AsmToken> &Tokens) {
3177 auto Loc = getLoc();
3178 Reg = MCRegister();
3179
3180 if (isToken(AsmToken::Identifier)) {
3181 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3182 if (!Reg)
3183 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3184 } else {
3185 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3186 }
3187
3188 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3189 if (!Reg) {
3190 assert(Parser.hasPendingError());
3191 return false;
3192 }
3193
3194 if (!subtargetHasRegister(*TRI, Reg)) {
3195 if (Reg == AMDGPU::SGPR_NULL) {
3196 Error(Loc, "'null' operand is not supported on this GPU");
3197 } else {
3199 " register not available on this GPU");
3200 }
3201 return false;
3202 }
3203
3204 return true;
3205}
3206
3207bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3208 MCRegister &Reg, unsigned &RegNum,
3209 unsigned &RegWidth,
3210 bool RestoreOnFailure /*=false*/) {
3211 Reg = MCRegister();
3212
3214 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3215 if (RestoreOnFailure) {
3216 while (!Tokens.empty()) {
3217 getLexer().UnLex(Tokens.pop_back_val());
3218 }
3219 }
3220 return true;
3221 }
3222 return false;
3223}
3224
3225std::optional<StringRef>
3226AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3227 switch (RegKind) {
3228 case IS_VGPR:
3229 return StringRef(".amdgcn.next_free_vgpr");
3230 case IS_SGPR:
3231 return StringRef(".amdgcn.next_free_sgpr");
3232 default:
3233 return std::nullopt;
3234 }
3235}
3236
3237void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3238 auto SymbolName = getGprCountSymbolName(RegKind);
3239 assert(SymbolName && "initializing invalid register kind");
3240 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3241 Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
3242 Sym->setRedefinable(true);
3243}
3244
3245bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3246 unsigned DwordRegIndex,
3247 unsigned RegWidth) {
3248 // Symbols are only defined for GCN targets
3249 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3250 return true;
3251
3252 auto SymbolName = getGprCountSymbolName(RegKind);
3253 if (!SymbolName)
3254 return true;
3255 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3256
3257 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3258 int64_t OldCount;
3259
3260 if (!Sym->isVariable())
3261 return !Error(getLoc(),
3262 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3263 if (!Sym->getVariableValue()->evaluateAsAbsolute(OldCount))
3264 return !Error(
3265 getLoc(),
3266 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3267
3268 if (OldCount <= NewMax)
3269 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
3270
3271 return true;
3272}
3273
3274std::unique_ptr<AMDGPUOperand>
3275AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3276 const auto &Tok = getToken();
3277 SMLoc StartLoc = Tok.getLoc();
3278 SMLoc EndLoc = Tok.getEndLoc();
3279 RegisterKind RegKind;
3281 unsigned RegNum, RegWidth;
3282
3283 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3284 return nullptr;
3285 }
3286 if (isHsaAbi(getSTI())) {
3287 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3288 return nullptr;
3289 } else
3290 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3291 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3292}
3293
3294ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3295 bool HasSP3AbsModifier, bool HasLit,
3296 bool HasLit64) {
3297 // TODO: add syntactic sugar for 1/(2*PI)
3298
3299 if (isRegister() || isModifier())
3300 return ParseStatus::NoMatch;
3301
3302 if (!HasLit && !HasLit64) {
3303 HasLit64 = trySkipId("lit64");
3304 HasLit = !HasLit64 && trySkipId("lit");
3305 if (HasLit || HasLit64) {
3306 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3307 return ParseStatus::Failure;
3308 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit, HasLit64);
3309 if (S.isSuccess() &&
3310 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3311 return ParseStatus::Failure;
3312 return S;
3313 }
3314 }
3315
3316 const auto& Tok = getToken();
3317 const auto& NextTok = peekToken();
3318 bool IsReal = Tok.is(AsmToken::Real);
3319 SMLoc S = getLoc();
3320 bool Negate = false;
3321
3322 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3323 lex();
3324 IsReal = true;
3325 Negate = true;
3326 }
3327
3328 AMDGPUOperand::Modifiers Mods;
3329 Mods.Lit = HasLit;
3330 Mods.Lit64 = HasLit64;
3331
3332 if (IsReal) {
3333 // Floating-point expressions are not supported.
3334 // Can only allow floating-point literals with an
3335 // optional sign.
3336
3337 StringRef Num = getTokenStr();
3338 lex();
3339
3340 APFloat RealVal(APFloat::IEEEdouble());
3341 auto roundMode = APFloat::rmNearestTiesToEven;
3342 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3343 return ParseStatus::Failure;
3344 if (Negate)
3345 RealVal.changeSign();
3346
3347 Operands.push_back(
3348 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3349 AMDGPUOperand::ImmTyNone, true));
3350 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3351 Op.setModifiers(Mods);
3352
3353 return ParseStatus::Success;
3354
3355 } else {
3356 int64_t IntVal;
3357 const MCExpr *Expr;
3358 SMLoc S = getLoc();
3359
3360 if (HasSP3AbsModifier) {
3361 // This is a workaround for handling expressions
3362 // as arguments of SP3 'abs' modifier, for example:
3363 // |1.0|
3364 // |-1|
3365 // |1+x|
3366 // This syntax is not compatible with syntax of standard
3367 // MC expressions (due to the trailing '|').
3368 SMLoc EndLoc;
3369 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3370 return ParseStatus::Failure;
3371 } else {
3372 if (Parser.parseExpression(Expr))
3373 return ParseStatus::Failure;
3374 }
3375
3376 if (Expr->evaluateAsAbsolute(IntVal)) {
3377 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3378 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3379 Op.setModifiers(Mods);
3380 } else {
3381 if (HasLit || HasLit64)
3382 return ParseStatus::NoMatch;
3383 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3384 }
3385
3386 return ParseStatus::Success;
3387 }
3388
3389 return ParseStatus::NoMatch;
3390}
3391
3392ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3393 if (!isRegister())
3394 return ParseStatus::NoMatch;
3395
3396 if (auto R = parseRegister()) {
3397 assert(R->isReg());
3398 Operands.push_back(std::move(R));
3399 return ParseStatus::Success;
3400 }
3401 return ParseStatus::Failure;
3402}
3403
3404ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3405 bool HasSP3AbsMod, bool HasLit,
3406 bool HasLit64) {
3407 ParseStatus Res = parseReg(Operands);
3408 if (!Res.isNoMatch())
3409 return Res;
3410 if (isModifier())
3411 return ParseStatus::NoMatch;
3412 return parseImm(Operands, HasSP3AbsMod, HasLit, HasLit64);
3413}
3414
3415bool
3416AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3417 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3418 const auto &str = Token.getString();
3419 return str == "abs" || str == "neg" || str == "sext";
3420 }
3421 return false;
3422}
3423
3424bool
3425AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3426 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3427}
3428
3429bool
3430AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3431 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3432}
3433
3434bool
3435AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3436 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3437}
3438
3439// Check if this is an operand modifier or an opcode modifier
3440// which may look like an expression but it is not. We should
3441// avoid parsing these modifiers as expressions. Currently
3442// recognized sequences are:
3443// |...|
3444// abs(...)
3445// neg(...)
3446// sext(...)
3447// -reg
3448// -|...|
3449// -abs(...)
3450// name:...
3451//
3452bool
3453AMDGPUAsmParser::isModifier() {
3454
3455 AsmToken Tok = getToken();
3456 AsmToken NextToken[2];
3457 peekTokens(NextToken);
3458
3459 return isOperandModifier(Tok, NextToken[0]) ||
3460 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3461 isOpcodeModifierWithVal(Tok, NextToken[0]);
3462}
3463
3464// Check if the current token is an SP3 'neg' modifier.
3465// Currently this modifier is allowed in the following context:
3466//
3467// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3468// 2. Before an 'abs' modifier: -abs(...)
3469// 3. Before an SP3 'abs' modifier: -|...|
3470//
3471// In all other cases "-" is handled as a part
3472// of an expression that follows the sign.
3473//
3474// Note: When "-" is followed by an integer literal,
3475// this is interpreted as integer negation rather
3476// than a floating-point NEG modifier applied to N.
3477// Beside being contr-intuitive, such use of floating-point
3478// NEG modifier would have resulted in different meaning
3479// of integer literals used with VOP1/2/C and VOP3,
3480// for example:
3481// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3482// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3483// Negative fp literals with preceding "-" are
3484// handled likewise for uniformity
3485//
3486bool
3487AMDGPUAsmParser::parseSP3NegModifier() {
3488
3489 AsmToken NextToken[2];
3490 peekTokens(NextToken);
3491
3492 if (isToken(AsmToken::Minus) &&
3493 (isRegister(NextToken[0], NextToken[1]) ||
3494 NextToken[0].is(AsmToken::Pipe) ||
3495 isId(NextToken[0], "abs"))) {
3496 lex();
3497 return true;
3498 }
3499
3500 return false;
3501}
3502
3504AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3505 bool AllowImm) {
3506 bool Neg, SP3Neg;
3507 bool Abs, SP3Abs;
3508 bool Lit64, Lit;
3509 SMLoc Loc;
3510
3511 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3512 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3513 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3514
3515 SP3Neg = parseSP3NegModifier();
3516
3517 Loc = getLoc();
3518 Neg = trySkipId("neg");
3519 if (Neg && SP3Neg)
3520 return Error(Loc, "expected register or immediate");
3521 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3522 return ParseStatus::Failure;
3523
3524 Abs = trySkipId("abs");
3525 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3526 return ParseStatus::Failure;
3527
3528 Lit64 = trySkipId("lit64");
3529 if (Lit64) {
3530 if (!skipToken(AsmToken::LParen, "expected left paren after lit64"))
3531 return ParseStatus::Failure;
3532 if (!has64BitLiterals())
3533 return Error(Loc, "lit64 is not supported on this GPU");
3534 }
3535
3536 Lit = !Lit64 && trySkipId("lit");
3537 if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit"))
3538 return ParseStatus::Failure;
3539
3540 Loc = getLoc();
3541 SP3Abs = trySkipToken(AsmToken::Pipe);
3542 if (Abs && SP3Abs)
3543 return Error(Loc, "expected register or immediate");
3544
3545 ParseStatus Res;
3546 if (AllowImm) {
3547 Res = parseRegOrImm(Operands, SP3Abs, Lit, Lit64);
3548 } else {
3549 Res = parseReg(Operands);
3550 }
3551 if (!Res.isSuccess())
3552 return (SP3Neg || Neg || SP3Abs || Abs || Lit || Lit64)
3554 : Res;
3555
3556 if ((Lit || Lit64) && !Operands.back()->isImm())
3557 Error(Loc, "expected immediate with lit modifier");
3558
3559 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3560 return ParseStatus::Failure;
3561 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3562 return ParseStatus::Failure;
3563 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3564 return ParseStatus::Failure;
3565 if ((Lit || Lit64) &&
3566 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3567 return ParseStatus::Failure;
3568
3569 AMDGPUOperand::Modifiers Mods;
3570 Mods.Abs = Abs || SP3Abs;
3571 Mods.Neg = Neg || SP3Neg;
3572 Mods.Lit = Lit;
3573 Mods.Lit64 = Lit64;
3574
3575 if (Mods.hasFPModifiers() || Lit || Lit64) {
3576 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3577 if (Op.isExpr())
3578 return Error(Op.getStartLoc(), "expected an absolute expression");
3579 Op.setModifiers(Mods);
3580 }
3581 return ParseStatus::Success;
3582}
3583
3585AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3586 bool AllowImm) {
3587 bool Sext = trySkipId("sext");
3588 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3589 return ParseStatus::Failure;
3590
3591 ParseStatus Res;
3592 if (AllowImm) {
3593 Res = parseRegOrImm(Operands);
3594 } else {
3595 Res = parseReg(Operands);
3596 }
3597 if (!Res.isSuccess())
3598 return Sext ? ParseStatus::Failure : Res;
3599
3600 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3601 return ParseStatus::Failure;
3602
3603 AMDGPUOperand::Modifiers Mods;
3604 Mods.Sext = Sext;
3605
3606 if (Mods.hasIntModifiers()) {
3607 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3608 if (Op.isExpr())
3609 return Error(Op.getStartLoc(), "expected an absolute expression");
3610 Op.setModifiers(Mods);
3611 }
3612
3613 return ParseStatus::Success;
3614}
3615
3616ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3617 return parseRegOrImmWithFPInputMods(Operands, false);
3618}
3619
3620ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3621 return parseRegOrImmWithIntInputMods(Operands, false);
3622}
3623
3624ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3625 auto Loc = getLoc();
3626 if (trySkipId("off")) {
3627 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3628 AMDGPUOperand::ImmTyOff, false));
3629 return ParseStatus::Success;
3630 }
3631
3632 if (!isRegister())
3633 return ParseStatus::NoMatch;
3634
3635 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3636 if (Reg) {
3637 Operands.push_back(std::move(Reg));
3638 return ParseStatus::Success;
3639 }
3640
3641 return ParseStatus::Failure;
3642}
3643
3644unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3645 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3646
3647 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3648 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3649 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3650 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3651 return Match_InvalidOperand;
3652
3653 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3654 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3655 // v_mac_f32/16 allow only dst_sel == DWORD;
3656 auto OpNum =
3657 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3658 const auto &Op = Inst.getOperand(OpNum);
3659 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3660 return Match_InvalidOperand;
3661 }
3662 }
3663
3664 // Asm can first try to match VOPD or VOPD3. By failing early here with
3665 // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
3666 // Checking later during validateInstruction does not give a chance to retry
3667 // parsing as a different encoding.
3668 if (tryAnotherVOPDEncoding(Inst))
3669 return Match_InvalidOperand;
3670
3671 return Match_Success;
3672}
3673
3675 static const unsigned Variants[] = {
3679 };
3680
3681 return ArrayRef(Variants);
3682}
3683
3684// What asm variants we should check
3685ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3686 if (isForcedDPP() && isForcedVOP3()) {
3687 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3688 return ArrayRef(Variants);
3689 }
3690 if (getForcedEncodingSize() == 32) {
3691 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3692 return ArrayRef(Variants);
3693 }
3694
3695 if (isForcedVOP3()) {
3696 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3697 return ArrayRef(Variants);
3698 }
3699
3700 if (isForcedSDWA()) {
3701 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3703 return ArrayRef(Variants);
3704 }
3705
3706 if (isForcedDPP()) {
3707 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3708 return ArrayRef(Variants);
3709 }
3710
3711 return getAllVariants();
3712}
3713
3714StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3715 if (isForcedDPP() && isForcedVOP3())
3716 return "e64_dpp";
3717
3718 if (getForcedEncodingSize() == 32)
3719 return "e32";
3720
3721 if (isForcedVOP3())
3722 return "e64";
3723
3724 if (isForcedSDWA())
3725 return "sdwa";
3726
3727 if (isForcedDPP())
3728 return "dpp";
3729
3730 return "";
3731}
3732
3733unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3734 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3735 for (MCPhysReg Reg : Desc.implicit_uses()) {
3736 switch (Reg) {
3737 case AMDGPU::FLAT_SCR:
3738 case AMDGPU::VCC:
3739 case AMDGPU::VCC_LO:
3740 case AMDGPU::VCC_HI:
3741 case AMDGPU::M0:
3742 return Reg;
3743 default:
3744 break;
3745 }
3746 }
3747 return AMDGPU::NoRegister;
3748}
3749
3750// NB: This code is correct only when used to check constant
3751// bus limitations because GFX7 support no f16 inline constants.
3752// Note that there are no cases when a GFX7 opcode violates
3753// constant bus limitations due to the use of an f16 constant.
3754bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3755 unsigned OpIdx) const {
3756 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3757
3760 return false;
3761 }
3762
3763 const MCOperand &MO = Inst.getOperand(OpIdx);
3764
3765 int64_t Val = MO.getImm();
3766 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3767
3768 switch (OpSize) { // expected operand size
3769 case 8:
3770 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3771 case 4:
3772 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3773 case 2: {
3774 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3777 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3778
3782
3786
3790
3793 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3794
3797 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3798
3800 return false;
3801
3802 llvm_unreachable("invalid operand type");
3803 }
3804 default:
3805 llvm_unreachable("invalid operand size");
3806 }
3807}
3808
3809unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3810 if (!isGFX10Plus())
3811 return 1;
3812
3813 switch (Opcode) {
3814 // 64-bit shift instructions can use only one scalar value input
3815 case AMDGPU::V_LSHLREV_B64_e64:
3816 case AMDGPU::V_LSHLREV_B64_gfx10:
3817 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3818 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3819 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3820 case AMDGPU::V_LSHRREV_B64_e64:
3821 case AMDGPU::V_LSHRREV_B64_gfx10:
3822 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3823 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3824 case AMDGPU::V_ASHRREV_I64_e64:
3825 case AMDGPU::V_ASHRREV_I64_gfx10:
3826 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3827 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3828 case AMDGPU::V_LSHL_B64_e64:
3829 case AMDGPU::V_LSHR_B64_e64:
3830 case AMDGPU::V_ASHR_I64_e64:
3831 return 1;
3832 default:
3833 return 2;
3834 }
3835}
3836
3837constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3839
3840// Get regular operand indices in the same order as specified
3841// in the instruction (but append mandatory literals to the end).
3843 bool AddMandatoryLiterals = false) {
3844
3845 int16_t ImmIdx =
3846 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3847
3848 if (isVOPD(Opcode)) {
3849 int16_t ImmXIdx =
3850 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immX) : -1;
3851
3852 return {getNamedOperandIdx(Opcode, OpName::src0X),
3853 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3854 getNamedOperandIdx(Opcode, OpName::vsrc2X),
3855 getNamedOperandIdx(Opcode, OpName::src0Y),
3856 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3857 getNamedOperandIdx(Opcode, OpName::vsrc2Y),
3858 ImmXIdx,
3859 ImmIdx};
3860 }
3861
3862 return {getNamedOperandIdx(Opcode, OpName::src0),
3863 getNamedOperandIdx(Opcode, OpName::src1),
3864 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3865}
3866
3867bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3868 const MCOperand &MO = Inst.getOperand(OpIdx);
3869 if (MO.isImm())
3870 return !isInlineConstant(Inst, OpIdx);
3871 if (MO.isReg()) {
3872 auto Reg = MO.getReg();
3873 if (!Reg)
3874 return false;
3875 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3876 auto PReg = mc2PseudoReg(Reg);
3877 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3878 }
3879 return true;
3880}
3881
3882// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3883// Writelane is special in that it can use SGPR and M0 (which would normally
3884// count as using the constant bus twice - but in this case it is allowed since
3885// the lane selector doesn't count as a use of the constant bus). However, it is
3886// still required to abide by the 1 SGPR rule.
3887static bool checkWriteLane(const MCInst &Inst) {
3888 const unsigned Opcode = Inst.getOpcode();
3889 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3890 return false;
3891 const MCOperand &LaneSelOp = Inst.getOperand(2);
3892 if (!LaneSelOp.isReg())
3893 return false;
3894 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3895 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3896}
3897
3898bool AMDGPUAsmParser::validateConstantBusLimitations(
3899 const MCInst &Inst, const OperandVector &Operands) {
3900 const unsigned Opcode = Inst.getOpcode();
3901 const MCInstrDesc &Desc = MII.get(Opcode);
3902 MCRegister LastSGPR;
3903 unsigned ConstantBusUseCount = 0;
3904 unsigned NumLiterals = 0;
3905 unsigned LiteralSize;
3906
3907 if (!(Desc.TSFlags &
3910 !isVOPD(Opcode))
3911 return true;
3912
3913 if (checkWriteLane(Inst))
3914 return true;
3915
3916 // Check special imm operands (used by madmk, etc)
3917 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3918 ++NumLiterals;
3919 LiteralSize = 4;
3920 }
3921
3922 SmallDenseSet<unsigned> SGPRsUsed;
3923 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3924 if (SGPRUsed != AMDGPU::NoRegister) {
3925 SGPRsUsed.insert(SGPRUsed);
3926 ++ConstantBusUseCount;
3927 }
3928
3929 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3930
3931 for (int OpIdx : OpIndices) {
3932 if (OpIdx == -1)
3933 continue;
3934
3935 const MCOperand &MO = Inst.getOperand(OpIdx);
3936 if (usesConstantBus(Inst, OpIdx)) {
3937 if (MO.isReg()) {
3938 LastSGPR = mc2PseudoReg(MO.getReg());
3939 // Pairs of registers with a partial intersections like these
3940 // s0, s[0:1]
3941 // flat_scratch_lo, flat_scratch
3942 // flat_scratch_lo, flat_scratch_hi
3943 // are theoretically valid but they are disabled anyway.
3944 // Note that this code mimics SIInstrInfo::verifyInstruction
3945 if (SGPRsUsed.insert(LastSGPR).second) {
3946 ++ConstantBusUseCount;
3947 }
3948 } else { // Expression or a literal
3949
3950 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3951 continue; // special operand like VINTERP attr_chan
3952
3953 // An instruction may use only one literal.
3954 // This has been validated on the previous step.
3955 // See validateVOPLiteral.
3956 // This literal may be used as more than one operand.
3957 // If all these operands are of the same size,
3958 // this literal counts as one scalar value.
3959 // Otherwise it counts as 2 scalar values.
3960 // See "GFX10 Shader Programming", section 3.6.2.3.
3961
3963 if (Size < 4)
3964 Size = 4;
3965
3966 if (NumLiterals == 0) {
3967 NumLiterals = 1;
3968 LiteralSize = Size;
3969 } else if (LiteralSize != Size) {
3970 NumLiterals = 2;
3971 }
3972 }
3973 }
3974 }
3975 ConstantBusUseCount += NumLiterals;
3976
3977 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3978 return true;
3979
3980 SMLoc LitLoc = getLitLoc(Operands);
3981 SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3982 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3983 Error(Loc, "invalid operand (violates constant bus restrictions)");
3984 return false;
3985}
3986
3987std::optional<unsigned>
3988AMDGPUAsmParser::checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3) {
3989
3990 const unsigned Opcode = Inst.getOpcode();
3991 if (!isVOPD(Opcode))
3992 return {};
3993
3994 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3995
3996 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3997 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3998 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3999 ? Opr.getReg()
4000 : MCRegister();
4001 };
4002
4003 // On GFX12+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
4004 // source-cache.
4005 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
4006 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
4007 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250;
4008 bool AllowSameVGPR = isGFX1250();
4009
4010 if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
4011 for (auto OpName : {OpName::src0X, OpName::src0Y}) {
4012 int I = getNamedOperandIdx(Opcode, OpName);
4013 const MCOperand &Op = Inst.getOperand(I);
4014 if (!Op.isImm())
4015 continue;
4016 int64_t Imm = Op.getImm();
4017 if (!AMDGPU::isInlinableLiteral32(Imm, hasInv2PiInlineImm()) &&
4018 !AMDGPU::isInlinableLiteral64(Imm, hasInv2PiInlineImm()))
4019 return (unsigned)I;
4020 }
4021
4022 for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
4023 OpName::vsrc2Y, OpName::imm}) {
4024 int I = getNamedOperandIdx(Opcode, OpName);
4025 if (I == -1)
4026 continue;
4027 const MCOperand &Op = Inst.getOperand(I);
4028 if (Op.isImm())
4029 return (unsigned)I;
4030 }
4031 }
4032
4033 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
4034 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
4035 getVRegIdx, *TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
4036
4037 return InvalidCompOprIdx;
4038}
4039
4040bool AMDGPUAsmParser::validateVOPD(const MCInst &Inst,
4041 const OperandVector &Operands) {
4042
4043 unsigned Opcode = Inst.getOpcode();
4044 bool AsVOPD3 = MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3;
4045
4046 if (AsVOPD3) {
4047 for (const std::unique_ptr<MCParsedAsmOperand> &Operand : Operands) {
4048 AMDGPUOperand &Op = (AMDGPUOperand &)*Operand;
4049 if ((Op.isRegKind() || Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
4050 (Op.getModifiers().getFPModifiersOperand() & SISrcMods::ABS))
4051 Error(Op.getStartLoc(), "ABS not allowed in VOPD3 instructions");
4052 }
4053 }
4054
4055 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
4056 if (!InvalidCompOprIdx.has_value())
4057 return true;
4058
4059 auto CompOprIdx = *InvalidCompOprIdx;
4060 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
4061 auto ParsedIdx =
4062 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
4063 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
4064 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
4065
4066 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
4067 if (CompOprIdx == VOPD::Component::DST) {
4068 if (AsVOPD3)
4069 Error(Loc, "dst registers must be distinct");
4070 else
4071 Error(Loc, "one dst register must be even and the other odd");
4072 } else {
4073 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
4074 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
4075 " operands must use different VGPR banks");
4076 }
4077
4078 return false;
4079}
4080
4081// \returns true if \p Inst does not satisfy VOPD constraints, but can be
4082// potentially used as VOPD3 with the same operands.
4083bool AMDGPUAsmParser::tryVOPD3(const MCInst &Inst) {
4084 // First check if it fits VOPD
4085 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, false);
4086 if (!InvalidCompOprIdx.has_value())
4087 return false;
4088
4089 // Then if it fits VOPD3
4090 InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, true);
4091 if (InvalidCompOprIdx.has_value()) {
4092 // If failed operand is dst it is better to show error about VOPD3
4093 // instruction as it has more capabilities and error message will be
4094 // more informative. If the dst is not legal for VOPD3, then it is not
4095 // legal for VOPD either.
4096 if (*InvalidCompOprIdx == VOPD::Component::DST)
4097 return true;
4098
4099 // Otherwise prefer VOPD as we may find ourselves in an awkward situation
4100 // with a conflict in tied implicit src2 of fmac and no asm operand to
4101 // to point to.
4102 return false;
4103 }
4104 return true;
4105}
4106
4107// \returns true is a VOPD3 instruction can be also represented as a shorter
4108// VOPD encoding.
4109bool AMDGPUAsmParser::tryVOPD(const MCInst &Inst) {
4110 const unsigned Opcode = Inst.getOpcode();
4111 const auto &II = getVOPDInstInfo(Opcode, &MII);
4112 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(getSTI());
4113 if (!getCanBeVOPD(II[VOPD::X].getOpcode(), EncodingFamily, false).X ||
4114 !getCanBeVOPD(II[VOPD::Y].getOpcode(), EncodingFamily, false).Y)
4115 return false;
4116
4117 // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
4118 // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
4119 // be parsed as VOPD which does not accept src2.
4120 if (II[VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 ||
4121 II[VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32)
4122 return false;
4123
4124 // If any modifiers are set this cannot be VOPD.
4125 for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4126 OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4127 OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4128 int I = getNamedOperandIdx(Opcode, OpName);
4129 if (I == -1)
4130 continue;
4131 if (Inst.getOperand(I).getImm())
4132 return false;
4133 }
4134
4135 return !tryVOPD3(Inst);
4136}
4137
4138// VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
4139// form but switch to VOPD3 otherwise.
4140bool AMDGPUAsmParser::tryAnotherVOPDEncoding(const MCInst &Inst) {
4141 const unsigned Opcode = Inst.getOpcode();
4142 if (!isGFX1250() || !isVOPD(Opcode))
4143 return false;
4144
4145 if (MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3)
4146 return tryVOPD(Inst);
4147 return tryVOPD3(Inst);
4148}
4149
4150bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
4151
4152 const unsigned Opc = Inst.getOpcode();
4153 const MCInstrDesc &Desc = MII.get(Opc);
4154
4155 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
4156 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
4157 assert(ClampIdx != -1);
4158 return Inst.getOperand(ClampIdx).getImm() == 0;
4159 }
4160
4161 return true;
4162}
4163
4166
4167bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
4168 const SMLoc &IDLoc) {
4169
4170 const unsigned Opc = Inst.getOpcode();
4171 const MCInstrDesc &Desc = MII.get(Opc);
4172
4173 if ((Desc.TSFlags & MIMGFlags) == 0)
4174 return true;
4175
4176 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
4177 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4178 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
4179
4180 if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample
4181 return true;
4182
4183 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
4184 return true;
4185
4186 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
4187 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
4188 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4189 if (DMask == 0)
4190 DMask = 1;
4191
4192 bool IsPackedD16 = false;
4193 unsigned DataSize =
4194 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
4195 if (hasPackedD16()) {
4196 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4197 IsPackedD16 = D16Idx >= 0;
4198 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
4199 DataSize = (DataSize + 1) / 2;
4200 }
4201
4202 if ((VDataSize / 4) == DataSize + TFESize)
4203 return true;
4204
4205 StringRef Modifiers;
4206 if (isGFX90A())
4207 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
4208 else
4209 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
4210
4211 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
4212 return false;
4213}
4214
4215bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
4216 const SMLoc &IDLoc) {
4217 const unsigned Opc = Inst.getOpcode();
4218 const MCInstrDesc &Desc = MII.get(Opc);
4219
4220 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
4221 return true;
4222
4224
4225 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4227 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
4228 AMDGPU::OpName RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG)
4229 ? AMDGPU::OpName::srsrc
4230 : AMDGPU::OpName::rsrc;
4231 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
4232 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4233 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
4234
4235 assert(VAddr0Idx != -1);
4236 assert(SrsrcIdx != -1);
4237 assert(SrsrcIdx > VAddr0Idx);
4238
4239 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
4240 if (BaseOpcode->BVH) {
4241 if (IsA16 == BaseOpcode->A16)
4242 return true;
4243 Error(IDLoc, "image address size does not match a16");
4244 return false;
4245 }
4246
4247 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4249 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
4250 unsigned ActualAddrSize =
4251 IsNSA ? SrsrcIdx - VAddr0Idx
4252 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
4253
4254 unsigned ExpectedAddrSize =
4255 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
4256
4257 if (IsNSA) {
4258 if (hasPartialNSAEncoding() &&
4259 ExpectedAddrSize >
4261 int VAddrLastIdx = SrsrcIdx - 1;
4262 unsigned VAddrLastSize =
4263 AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
4264
4265 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
4266 }
4267 } else {
4268 if (ExpectedAddrSize > 12)
4269 ExpectedAddrSize = 16;
4270
4271 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
4272 // This provides backward compatibility for assembly created
4273 // before 160b/192b/224b types were directly supported.
4274 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4275 return true;
4276 }
4277
4278 if (ActualAddrSize == ExpectedAddrSize)
4279 return true;
4280
4281 Error(IDLoc, "image address size does not match dim and a16");
4282 return false;
4283}
4284
4285bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
4286
4287 const unsigned Opc = Inst.getOpcode();
4288 const MCInstrDesc &Desc = MII.get(Opc);
4289
4290 if ((Desc.TSFlags & MIMGFlags) == 0)
4291 return true;
4292 if (!Desc.mayLoad() || !Desc.mayStore())
4293 return true; // Not atomic
4294
4295 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4296 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4297
4298 // This is an incomplete check because image_atomic_cmpswap
4299 // may only use 0x3 and 0xf while other atomic operations
4300 // may use 0x1 and 0x3. However these limitations are
4301 // verified when we check that dmask matches dst size.
4302 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4303}
4304
4305bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4306
4307 const unsigned Opc = Inst.getOpcode();
4308 const MCInstrDesc &Desc = MII.get(Opc);
4309
4310 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4311 return true;
4312
4313 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4314 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4315
4316 // GATHER4 instructions use dmask in a different fashion compared to
4317 // other MIMG instructions. The only useful DMASK values are
4318 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4319 // (red,red,red,red) etc.) The ISA document doesn't mention
4320 // this.
4321 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4322}
4323
4324bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst,
4325 const OperandVector &Operands) {
4326 if (!isGFX10Plus())
4327 return true;
4328
4329 const unsigned Opc = Inst.getOpcode();
4330 const MCInstrDesc &Desc = MII.get(Opc);
4331
4332 if ((Desc.TSFlags & MIMGFlags) == 0)
4333 return true;
4334
4335 // image_bvh_intersect_ray instructions do not have dim
4337 return true;
4338
4339 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4340 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4341 if (Op.isDim())
4342 return true;
4343 }
4344 return false;
4345}
4346
4347bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4348 const unsigned Opc = Inst.getOpcode();
4349 const MCInstrDesc &Desc = MII.get(Opc);
4350
4351 if ((Desc.TSFlags & MIMGFlags) == 0)
4352 return true;
4353
4355 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4357
4358 if (!BaseOpcode->MSAA)
4359 return true;
4360
4361 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4362 assert(DimIdx != -1);
4363
4364 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4366
4367 return DimInfo->MSAA;
4368}
4369
4370static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4371{
4372 switch (Opcode) {
4373 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4374 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4375 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4376 return true;
4377 default:
4378 return false;
4379 }
4380}
4381
4382// movrels* opcodes should only allow VGPRS as src0.
4383// This is specified in .td description for vop1/vop3,
4384// but sdwa is handled differently. See isSDWAOperand.
4385bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4386 const OperandVector &Operands) {
4387
4388 const unsigned Opc = Inst.getOpcode();
4389 const MCInstrDesc &Desc = MII.get(Opc);
4390
4391 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4392 return true;
4393
4394 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4395 assert(Src0Idx != -1);
4396
4397 SMLoc ErrLoc;
4398 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4399 if (Src0.isReg()) {
4400 auto Reg = mc2PseudoReg(Src0.getReg());
4401 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4402 if (!isSGPR(Reg, TRI))
4403 return true;
4404 ErrLoc = getRegLoc(Reg, Operands);
4405 } else {
4406 ErrLoc = getConstLoc(Operands);
4407 }
4408
4409 Error(ErrLoc, "source operand must be a VGPR");
4410 return false;
4411}
4412
4413bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4414 const OperandVector &Operands) {
4415
4416 const unsigned Opc = Inst.getOpcode();
4417
4418 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4419 return true;
4420
4421 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4422 assert(Src0Idx != -1);
4423
4424 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4425 if (!Src0.isReg())
4426 return true;
4427
4428 auto Reg = mc2PseudoReg(Src0.getReg());
4429 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4430 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4431 Error(getRegLoc(Reg, Operands),
4432 "source operand must be either a VGPR or an inline constant");
4433 return false;
4434 }
4435
4436 return true;
4437}
4438
4439bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4440 const OperandVector &Operands) {
4441 unsigned Opcode = Inst.getOpcode();
4442 const MCInstrDesc &Desc = MII.get(Opcode);
4443
4444 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4445 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4446 return true;
4447
4448 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4449 if (Src2Idx == -1)
4450 return true;
4451
4452 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4453 Error(getConstLoc(Operands),
4454 "inline constants are not allowed for this operand");
4455 return false;
4456 }
4457
4458 return true;
4459}
4460
4461bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4462 const OperandVector &Operands) {
4463 const unsigned Opc = Inst.getOpcode();
4464 const MCInstrDesc &Desc = MII.get(Opc);
4465
4466 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4467 return true;
4468
4469 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4470 if (BlgpIdx != -1) {
4471 if (const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(Opc)) {
4472 int CbszIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
4473
4474 unsigned CBSZ = Inst.getOperand(CbszIdx).getImm();
4475 unsigned BLGP = Inst.getOperand(BlgpIdx).getImm();
4476
4477 // Validate the correct register size was used for the floating point
4478 // format operands
4479
4480 bool Success = true;
4481 if (Info->NumRegsSrcA != mfmaScaleF8F6F4FormatToNumRegs(CBSZ)) {
4482 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4483 Error(getRegLoc(mc2PseudoReg(Inst.getOperand(Src0Idx).getReg()),
4484 Operands),
4485 "wrong register tuple size for cbsz value " + Twine(CBSZ));
4486 Success = false;
4487 }
4488
4489 if (Info->NumRegsSrcB != mfmaScaleF8F6F4FormatToNumRegs(BLGP)) {
4490 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4491 Error(getRegLoc(mc2PseudoReg(Inst.getOperand(Src1Idx).getReg()),
4492 Operands),
4493 "wrong register tuple size for blgp value " + Twine(BLGP));
4494 Success = false;
4495 }
4496
4497 return Success;
4498 }
4499 }
4500
4501 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4502 if (Src2Idx == -1)
4503 return true;
4504
4505 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4506 if (!Src2.isReg())
4507 return true;
4508
4509 MCRegister Src2Reg = Src2.getReg();
4510 MCRegister DstReg = Inst.getOperand(0).getReg();
4511 if (Src2Reg == DstReg)
4512 return true;
4513
4514 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4515 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
4516 return true;
4517
4518 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4519 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
4520 "source 2 operand must not partially overlap with dst");
4521 return false;
4522 }
4523
4524 return true;
4525}
4526
4527bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4528 switch (Inst.getOpcode()) {
4529 default:
4530 return true;
4531 case V_DIV_SCALE_F32_gfx6_gfx7:
4532 case V_DIV_SCALE_F32_vi:
4533 case V_DIV_SCALE_F32_gfx10:
4534 case V_DIV_SCALE_F64_gfx6_gfx7:
4535 case V_DIV_SCALE_F64_vi:
4536 case V_DIV_SCALE_F64_gfx10:
4537 break;
4538 }
4539
4540 // TODO: Check that src0 = src1 or src2.
4541
4542 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4543 AMDGPU::OpName::src2_modifiers,
4544 AMDGPU::OpName::src2_modifiers}) {
4545 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
4546 .getImm() &
4548 return false;
4549 }
4550 }
4551
4552 return true;
4553}
4554
4555bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4556
4557 const unsigned Opc = Inst.getOpcode();
4558 const MCInstrDesc &Desc = MII.get(Opc);
4559
4560 if ((Desc.TSFlags & MIMGFlags) == 0)
4561 return true;
4562
4563 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4564 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4565 if (isCI() || isSI())
4566 return false;
4567 }
4568
4569 return true;
4570}
4571
4572bool AMDGPUAsmParser::validateTensorR128(const MCInst &Inst) {
4573 const unsigned Opc = Inst.getOpcode();
4574 const MCInstrDesc &Desc = MII.get(Opc);
4575
4576 if ((Desc.TSFlags & SIInstrFlags::TENSOR_CNT) == 0)
4577 return true;
4578
4579 int R128Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
4580
4581 return R128Idx < 0 || !Inst.getOperand(R128Idx).getImm();
4582}
4583
4584static bool IsRevOpcode(const unsigned Opcode)
4585{
4586 switch (Opcode) {
4587 case AMDGPU::V_SUBREV_F32_e32:
4588 case AMDGPU::V_SUBREV_F32_e64:
4589 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4590 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4591 case AMDGPU::V_SUBREV_F32_e32_vi:
4592 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4593 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4594 case AMDGPU::V_SUBREV_F32_e64_vi:
4595
4596 case AMDGPU::V_SUBREV_CO_U32_e32:
4597 case AMDGPU::V_SUBREV_CO_U32_e64:
4598 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4599 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4600
4601 case AMDGPU::V_SUBBREV_U32_e32:
4602 case AMDGPU::V_SUBBREV_U32_e64:
4603 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4604 case AMDGPU::V_SUBBREV_U32_e32_vi:
4605 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4606 case AMDGPU::V_SUBBREV_U32_e64_vi:
4607
4608 case AMDGPU::V_SUBREV_U32_e32:
4609 case AMDGPU::V_SUBREV_U32_e64:
4610 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4611 case AMDGPU::V_SUBREV_U32_e32_vi:
4612 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4613 case AMDGPU::V_SUBREV_U32_e64_vi:
4614
4615 case AMDGPU::V_SUBREV_F16_e32:
4616 case AMDGPU::V_SUBREV_F16_e64:
4617 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4618 case AMDGPU::V_SUBREV_F16_e32_vi:
4619 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4620 case AMDGPU::V_SUBREV_F16_e64_vi:
4621
4622 case AMDGPU::V_SUBREV_U16_e32:
4623 case AMDGPU::V_SUBREV_U16_e64:
4624 case AMDGPU::V_SUBREV_U16_e32_vi:
4625 case AMDGPU::V_SUBREV_U16_e64_vi:
4626
4627 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4628 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4629 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4630
4631 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4632 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4633
4634 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4635 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4636
4637 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4638 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4639
4640 case AMDGPU::V_LSHRREV_B32_e32:
4641 case AMDGPU::V_LSHRREV_B32_e64:
4642 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4643 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4644 case AMDGPU::V_LSHRREV_B32_e32_vi:
4645 case AMDGPU::V_LSHRREV_B32_e64_vi:
4646 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4647 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4648
4649 case AMDGPU::V_ASHRREV_I32_e32:
4650 case AMDGPU::V_ASHRREV_I32_e64:
4651 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4652 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4653 case AMDGPU::V_ASHRREV_I32_e32_vi:
4654 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4655 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4656 case AMDGPU::V_ASHRREV_I32_e64_vi:
4657
4658 case AMDGPU::V_LSHLREV_B32_e32:
4659 case AMDGPU::V_LSHLREV_B32_e64:
4660 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4661 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4662 case AMDGPU::V_LSHLREV_B32_e32_vi:
4663 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4664 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4665 case AMDGPU::V_LSHLREV_B32_e64_vi:
4666
4667 case AMDGPU::V_LSHLREV_B16_e32:
4668 case AMDGPU::V_LSHLREV_B16_e64:
4669 case AMDGPU::V_LSHLREV_B16_e32_vi:
4670 case AMDGPU::V_LSHLREV_B16_e64_vi:
4671 case AMDGPU::V_LSHLREV_B16_gfx10:
4672
4673 case AMDGPU::V_LSHRREV_B16_e32:
4674 case AMDGPU::V_LSHRREV_B16_e64:
4675 case AMDGPU::V_LSHRREV_B16_e32_vi:
4676 case AMDGPU::V_LSHRREV_B16_e64_vi:
4677 case AMDGPU::V_LSHRREV_B16_gfx10:
4678
4679 case AMDGPU::V_ASHRREV_I16_e32:
4680 case AMDGPU::V_ASHRREV_I16_e64:
4681 case AMDGPU::V_ASHRREV_I16_e32_vi:
4682 case AMDGPU::V_ASHRREV_I16_e64_vi:
4683 case AMDGPU::V_ASHRREV_I16_gfx10:
4684
4685 case AMDGPU::V_LSHLREV_B64_e64:
4686 case AMDGPU::V_LSHLREV_B64_gfx10:
4687 case AMDGPU::V_LSHLREV_B64_vi:
4688
4689 case AMDGPU::V_LSHRREV_B64_e64:
4690 case AMDGPU::V_LSHRREV_B64_gfx10:
4691 case AMDGPU::V_LSHRREV_B64_vi:
4692
4693 case AMDGPU::V_ASHRREV_I64_e64:
4694 case AMDGPU::V_ASHRREV_I64_gfx10:
4695 case AMDGPU::V_ASHRREV_I64_vi:
4696
4697 case AMDGPU::V_PK_LSHLREV_B16:
4698 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4699 case AMDGPU::V_PK_LSHLREV_B16_vi:
4700
4701 case AMDGPU::V_PK_LSHRREV_B16:
4702 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4703 case AMDGPU::V_PK_LSHRREV_B16_vi:
4704 case AMDGPU::V_PK_ASHRREV_I16:
4705 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4706 case AMDGPU::V_PK_ASHRREV_I16_vi:
4707 return true;
4708 default:
4709 return false;
4710 }
4711}
4712
4713std::optional<StringRef>
4714AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4715
4716 using namespace SIInstrFlags;
4717 const unsigned Opcode = Inst.getOpcode();
4718 const MCInstrDesc &Desc = MII.get(Opcode);
4719
4720 // lds_direct register is defined so that it can be used
4721 // with 9-bit operands only. Ignore encodings which do not accept these.
4722 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4723 if ((Desc.TSFlags & Enc) == 0)
4724 return std::nullopt;
4725
4726 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4727 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4728 if (SrcIdx == -1)
4729 break;
4730 const auto &Src = Inst.getOperand(SrcIdx);
4731 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4732
4733 if (isGFX90A() || isGFX11Plus())
4734 return StringRef("lds_direct is not supported on this GPU");
4735
4736 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4737 return StringRef("lds_direct cannot be used with this instruction");
4738
4739 if (SrcName != OpName::src0)
4740 return StringRef("lds_direct may be used as src0 only");
4741 }
4742 }
4743
4744 return std::nullopt;
4745}
4746
4747SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4748 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4749 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4750 if (Op.isFlatOffset())
4751 return Op.getStartLoc();
4752 }
4753 return getLoc();
4754}
4755
4756bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4757 const OperandVector &Operands) {
4758 auto Opcode = Inst.getOpcode();
4759 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4760 if (OpNum == -1)
4761 return true;
4762
4763 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4764 if ((TSFlags & SIInstrFlags::FLAT))
4765 return validateFlatOffset(Inst, Operands);
4766
4767 if ((TSFlags & SIInstrFlags::SMRD))
4768 return validateSMEMOffset(Inst, Operands);
4769
4770 const auto &Op = Inst.getOperand(OpNum);
4771 if (isGFX12Plus() &&
4772 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4773 const unsigned OffsetSize = 24;
4774 if (!isIntN(OffsetSize, Op.getImm())) {
4775 Error(getFlatOffsetLoc(Operands),
4776 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4777 return false;
4778 }
4779 } else {
4780 const unsigned OffsetSize = 16;
4781 if (!isUIntN(OffsetSize, Op.getImm())) {
4782 Error(getFlatOffsetLoc(Operands),
4783 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4784 return false;
4785 }
4786 }
4787 return true;
4788}
4789
4790bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4791 const OperandVector &Operands) {
4792 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4793 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4794 return true;
4795
4796 auto Opcode = Inst.getOpcode();
4797 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4798 assert(OpNum != -1);
4799
4800 const auto &Op = Inst.getOperand(OpNum);
4801 if (!hasFlatOffsets() && Op.getImm() != 0) {
4802 Error(getFlatOffsetLoc(Operands),
4803 "flat offset modifier is not supported on this GPU");
4804 return false;
4805 }
4806
4807 // For pre-GFX12 FLAT instructions the offset must be positive;
4808 // MSB is ignored and forced to zero.
4809 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4810 bool AllowNegative =
4812 isGFX12Plus();
4813 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4814 Error(getFlatOffsetLoc(Operands),
4815 Twine("expected a ") +
4816 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4817 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4818 return false;
4819 }
4820
4821 return true;
4822}
4823
4824SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4825 // Start with second operand because SMEM Offset cannot be dst or src0.
4826 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4827 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4828 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4829 return Op.getStartLoc();
4830 }
4831 return getLoc();
4832}
4833
4834bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4835 const OperandVector &Operands) {
4836 if (isCI() || isSI())
4837 return true;
4838
4839 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4840 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4841 return true;
4842
4843 auto Opcode = Inst.getOpcode();
4844 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4845 if (OpNum == -1)
4846 return true;
4847
4848 const auto &Op = Inst.getOperand(OpNum);
4849 if (!Op.isImm())
4850 return true;
4851
4852 uint64_t Offset = Op.getImm();
4853 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4856 return true;
4857
4858 Error(getSMEMOffsetLoc(Operands),
4859 isGFX12Plus() ? "expected a 24-bit signed offset"
4860 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4861 : "expected a 21-bit signed offset");
4862
4863 return false;
4864}
4865
4866bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4867 unsigned Opcode = Inst.getOpcode();
4868 const MCInstrDesc &Desc = MII.get(Opcode);
4869 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4870 return true;
4871
4872 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4873 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4874
4875 const int OpIndices[] = { Src0Idx, Src1Idx };
4876
4877 unsigned NumExprs = 0;
4878 unsigned NumLiterals = 0;
4880
4881 for (int OpIdx : OpIndices) {
4882 if (OpIdx == -1) break;
4883
4884 const MCOperand &MO = Inst.getOperand(OpIdx);
4885 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4887 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4888 uint64_t Value = static_cast<uint64_t>(MO.getImm());
4889 if (NumLiterals == 0 || LiteralValue != Value) {
4891 ++NumLiterals;
4892 }
4893 } else if (MO.isExpr()) {
4894 ++NumExprs;
4895 }
4896 }
4897 }
4898
4899 return NumLiterals + NumExprs <= 1;
4900}
4901
4902bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4903 const unsigned Opc = Inst.getOpcode();
4904 if (isPermlane16(Opc)) {
4905 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4906 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4907
4908 if (OpSel & ~3)
4909 return false;
4910 }
4911
4912 uint64_t TSFlags = MII.get(Opc).TSFlags;
4913
4914 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4915 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4916 if (OpSelIdx != -1) {
4917 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4918 return false;
4919 }
4920 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4921 if (OpSelHiIdx != -1) {
4922 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4923 return false;
4924 }
4925 }
4926
4927 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4928 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4929 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4930 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4931 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4932 if (OpSel & 3)
4933 return false;
4934 }
4935
4936 // Packed math FP32 instructions typically accept SGPRs or VGPRs as source
4937 // operands. On gfx12+, if a source operand uses SGPRs, the HW can only read
4938 // the first SGPR and use it for both the low and high operations.
4939 if (isPackedFP32Inst(Opc) && isGFX12Plus()) {
4940 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4941 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4942 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4943 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4944
4945 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4946 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4947 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4948 unsigned OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
4949
4950 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4951
4952 auto VerifyOneSGPR = [OpSel, OpSelHi](unsigned Index) -> bool {
4953 unsigned Mask = 1U << Index;
4954 return ((OpSel & Mask) == 0) && ((OpSelHi & Mask) == 0);
4955 };
4956
4957 if (Src0.isReg() && isSGPR(Src0.getReg(), TRI) &&
4958 !VerifyOneSGPR(/*Index=*/0))
4959 return false;
4960 if (Src1.isReg() && isSGPR(Src1.getReg(), TRI) &&
4961 !VerifyOneSGPR(/*Index=*/1))
4962 return false;
4963
4964 int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4965 if (Src2Idx != -1) {
4966 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4967 if (Src2.isReg() && isSGPR(Src2.getReg(), TRI) &&
4968 !VerifyOneSGPR(/*Index=*/2))
4969 return false;
4970 }
4971 }
4972
4973 return true;
4974}
4975
4976bool AMDGPUAsmParser::validateTrue16OpSel(const MCInst &Inst) {
4977 if (!hasTrue16Insts())
4978 return true;
4979 const MCRegisterInfo *MRI = getMRI();
4980 const unsigned Opc = Inst.getOpcode();
4981 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4982 if (OpSelIdx == -1)
4983 return true;
4984 unsigned OpSelOpValue = Inst.getOperand(OpSelIdx).getImm();
4985 // If the value is 0 we could have a default OpSel Operand, so conservatively
4986 // allow it.
4987 if (OpSelOpValue == 0)
4988 return true;
4989 unsigned OpCount = 0;
4990 for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
4991 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
4992 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), OpName);
4993 if (OpIdx == -1)
4994 continue;
4995 const MCOperand &Op = Inst.getOperand(OpIdx);
4996 if (Op.isReg() &&
4997 MRI->getRegClass(AMDGPU::VGPR_16RegClassID).contains(Op.getReg())) {
4998 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Op.getReg(), *MRI);
4999 bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);
5000 if (OpSelOpIsHi != VGPRSuffixIsHi)
5001 return false;
5002 }
5003 ++OpCount;
5004 }
5005
5006 return true;
5007}
5008
5009bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, AMDGPU::OpName OpName) {
5010 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
5011
5012 const unsigned Opc = Inst.getOpcode();
5013 uint64_t TSFlags = MII.get(Opc).TSFlags;
5014
5015 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
5016 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
5017 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
5018 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
5019 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
5020 !(TSFlags & SIInstrFlags::IsSWMMAC))
5021 return true;
5022
5023 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
5024 if (NegIdx == -1)
5025 return true;
5026
5027 unsigned Neg = Inst.getOperand(NegIdx).getImm();
5028
5029 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
5030 // on some src operands but not allowed on other.
5031 // It is convenient that such instructions don't have src_modifiers operand
5032 // for src operands that don't allow neg because they also don't allow opsel.
5033
5034 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
5035 AMDGPU::OpName::src1_modifiers,
5036 AMDGPU::OpName::src2_modifiers};
5037
5038 for (unsigned i = 0; i < 3; ++i) {
5039 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
5040 if (Neg & (1 << i))
5041 return false;
5042 }
5043 }
5044
5045 return true;
5046}
5047
5048bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
5049 const OperandVector &Operands) {
5050 const unsigned Opc = Inst.getOpcode();
5051 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
5052 if (DppCtrlIdx >= 0) {
5053 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
5054
5055 if (!AMDGPU::isLegalDPALU_DPPControl(getSTI(), DppCtrl) &&
5056 AMDGPU::isDPALU_DPP(MII.get(Opc), getSTI())) {
5057 // DP ALU DPP is supported for row_newbcast only on GFX9* and row_share
5058 // only on GFX12.
5059 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
5060 Error(S, isGFX12() ? "DP ALU dpp only supports row_share"
5061 : "DP ALU dpp only supports row_newbcast");
5062 return false;
5063 }
5064 }
5065
5066 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
5067 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
5068
5069 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
5070 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
5071 if (Src1Idx >= 0) {
5072 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
5073 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5074 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
5075 auto Reg = mc2PseudoReg(Inst.getOperand(Src1Idx).getReg());
5076 SMLoc S = getRegLoc(Reg, Operands);
5077 Error(S, "invalid operand for instruction");
5078 return false;
5079 }
5080 if (Src1.isImm()) {
5081 Error(getInstLoc(Operands),
5082 "src1 immediate operand invalid for instruction");
5083 return false;
5084 }
5085 }
5086 }
5087
5088 return true;
5089}
5090
5091// Check if VCC register matches wavefront size
5092bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const {
5093 auto FB = getFeatureBits();
5094 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
5095 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
5096}
5097
5098// One unique literal can be used. VOP3 literal is only allowed in GFX10+
5099bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
5100 const OperandVector &Operands) {
5101 unsigned Opcode = Inst.getOpcode();
5102 const MCInstrDesc &Desc = MII.get(Opcode);
5103 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
5104 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
5105 !HasMandatoryLiteral && !isVOPD(Opcode))
5106 return true;
5107
5108 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
5109
5110 unsigned NumExprs = 0;
5111 unsigned NumLiterals = 0;
5113
5114 for (int OpIdx : OpIndices) {
5115 if (OpIdx == -1)
5116 continue;
5117
5118 const MCOperand &MO = Inst.getOperand(OpIdx);
5119 if (!MO.isImm() && !MO.isExpr())
5120 continue;
5121 if (!isSISrcOperand(Desc, OpIdx))
5122 continue;
5123
5124 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
5125 uint64_t Value = static_cast<uint64_t>(MO.getImm());
5126 bool IsForcedFP64 =
5127 Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_KIMM64 ||
5128 (Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_REG_IMM_FP64 &&
5129 HasMandatoryLiteral);
5130 bool IsFP64 = (IsForcedFP64 || AMDGPU::isSISrcFPOperand(Desc, OpIdx)) &&
5131 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
5132 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
5133
5134 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value) &&
5135 !IsForcedFP64 && (!has64BitLiterals() || Desc.getSize() != 4)) {
5136 Error(getLitLoc(Operands), "invalid operand for instruction");
5137 return false;
5138 }
5139
5140 if (IsFP64 && IsValid32Op && !IsForcedFP64)
5141 Value = Hi_32(Value);
5142
5143 if (NumLiterals == 0 || LiteralValue != Value) {
5145 ++NumLiterals;
5146 }
5147 } else if (MO.isExpr()) {
5148 ++NumExprs;
5149 }
5150 }
5151 NumLiterals += NumExprs;
5152
5153 if (!NumLiterals)
5154 return true;
5155
5156 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
5157 Error(getLitLoc(Operands), "literal operands are not supported");
5158 return false;
5159 }
5160
5161 if (NumLiterals > 1) {
5162 Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
5163 return false;
5164 }
5165
5166 return true;
5167}
5168
5169// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
5170static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name,
5171 const MCRegisterInfo *MRI) {
5172 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name);
5173 if (OpIdx < 0)
5174 return -1;
5175
5176 const MCOperand &Op = Inst.getOperand(OpIdx);
5177 if (!Op.isReg())
5178 return -1;
5179
5180 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5181 auto Reg = Sub ? Sub : Op.getReg();
5182 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5183 return AGPR32.contains(Reg) ? 1 : 0;
5184}
5185
5186bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
5187 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5188 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
5190 SIInstrFlags::DS)) == 0)
5191 return true;
5192
5193 AMDGPU::OpName DataName = (TSFlags & SIInstrFlags::DS)
5194 ? AMDGPU::OpName::data0
5195 : AMDGPU::OpName::vdata;
5196
5197 const MCRegisterInfo *MRI = getMRI();
5198 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
5199 int DataAreg = IsAGPROperand(Inst, DataName, MRI);
5200
5201 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
5202 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
5203 if (Data2Areg >= 0 && Data2Areg != DataAreg)
5204 return false;
5205 }
5206
5207 auto FB = getFeatureBits();
5208 if (FB[AMDGPU::FeatureGFX90AInsts]) {
5209 if (DataAreg < 0 || DstAreg < 0)
5210 return true;
5211 return DstAreg == DataAreg;
5212 }
5213
5214 return DstAreg < 1 && DataAreg < 1;
5215}
5216
5217bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
5218 auto FB = getFeatureBits();
5219 if (!FB[AMDGPU::FeatureGFX90AInsts] && !FB[AMDGPU::FeatureGFX1250Insts])
5220 return true;
5221
5222 unsigned Opc = Inst.getOpcode();
5223 const MCRegisterInfo *MRI = getMRI();
5224 // DS_READ_B96_TR_B6 is the only DS instruction in GFX950, that allows
5225 // unaligned VGPR. All others only allow even aligned VGPRs.
5226 if (FB[AMDGPU::FeatureGFX90AInsts] && Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5227 return true;
5228
5229 if (FB[AMDGPU::FeatureGFX1250Insts]) {
5230 switch (Opc) {
5231 default:
5232 break;
5233 case AMDGPU::DS_LOAD_TR6_B96:
5234 case AMDGPU::DS_LOAD_TR6_B96_gfx12:
5235 // DS_LOAD_TR6_B96 is the only DS instruction in GFX1250, that
5236 // allows unaligned VGPR. All others only allow even aligned VGPRs.
5237 return true;
5238 case AMDGPU::GLOBAL_LOAD_TR6_B96:
5239 case AMDGPU::GLOBAL_LOAD_TR6_B96_gfx1250: {
5240 // GLOBAL_LOAD_TR6_B96 is the only GLOBAL instruction in GFX1250, that
5241 // allows unaligned VGPR for vdst, but other operands still only allow
5242 // even aligned VGPRs.
5243 int VAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
5244 if (VAddrIdx != -1) {
5245 const MCOperand &Op = Inst.getOperand(VAddrIdx);
5246 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5247 if ((Sub - AMDGPU::VGPR0) & 1)
5248 return false;
5249 }
5250 return true;
5251 }
5252 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR:
5253 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR_gfx1250:
5254 return true;
5255 }
5256 }
5257
5258 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5259 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5260 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
5261 const MCOperand &Op = Inst.getOperand(I);
5262 if (!Op.isReg())
5263 continue;
5264
5265 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5266 if (!Sub)
5267 continue;
5268
5269 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
5270 return false;
5271 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
5272 return false;
5273 }
5274
5275 return true;
5276}
5277
5278SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
5279 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5280 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5281 if (Op.isBLGP())
5282 return Op.getStartLoc();
5283 }
5284 return SMLoc();
5285}
5286
5287bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
5288 const OperandVector &Operands) {
5289 unsigned Opc = Inst.getOpcode();
5290 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
5291 if (BlgpIdx == -1)
5292 return true;
5293 SMLoc BLGPLoc = getBLGPLoc(Operands);
5294 if (!BLGPLoc.isValid())
5295 return true;
5296 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
5297 auto FB = getFeatureBits();
5298 bool UsesNeg = false;
5299 if (FB[AMDGPU::FeatureGFX940Insts]) {
5300 switch (Opc) {
5301 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
5302 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
5303 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
5304 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
5305 UsesNeg = true;
5306 }
5307 }
5308
5309 if (IsNeg == UsesNeg)
5310 return true;
5311
5312 Error(BLGPLoc,
5313 UsesNeg ? "invalid modifier: blgp is not supported"
5314 : "invalid modifier: neg is not supported");
5315
5316 return false;
5317}
5318
5319bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
5320 const OperandVector &Operands) {
5321 if (!isGFX11Plus())
5322 return true;
5323
5324 unsigned Opc = Inst.getOpcode();
5325 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
5326 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
5327 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
5328 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
5329 return true;
5330
5331 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
5332 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
5333 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
5334 if (Reg == AMDGPU::SGPR_NULL)
5335 return true;
5336
5337 SMLoc RegLoc = getRegLoc(Reg, Operands);
5338 Error(RegLoc, "src0 must be null");
5339 return false;
5340}
5341
5342bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
5343 const OperandVector &Operands) {
5344 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5345 if ((TSFlags & SIInstrFlags::DS) == 0)
5346 return true;
5347 if (TSFlags & SIInstrFlags::GWS)
5348 return validateGWS(Inst, Operands);
5349 // Only validate GDS for non-GWS instructions.
5350 if (hasGDS())
5351 return true;
5352 int GDSIdx =
5353 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
5354 if (GDSIdx < 0)
5355 return true;
5356 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
5357 if (GDS) {
5358 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
5359 Error(S, "gds modifier is not supported on this GPU");
5360 return false;
5361 }
5362 return true;
5363}
5364
5365// gfx90a has an undocumented limitation:
5366// DS_GWS opcodes must use even aligned registers.
5367bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
5368 const OperandVector &Operands) {
5369 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5370 return true;
5371
5372 int Opc = Inst.getOpcode();
5373 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5374 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5375 return true;
5376
5377 const MCRegisterInfo *MRI = getMRI();
5378 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5379 int Data0Pos =
5380 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
5381 assert(Data0Pos != -1);
5382 auto Reg = Inst.getOperand(Data0Pos).getReg();
5383 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5384 if (RegIdx & 1) {
5385 SMLoc RegLoc = getRegLoc(Reg, Operands);
5386 Error(RegLoc, "vgpr must be even aligned");
5387 return false;
5388 }
5389
5390 return true;
5391}
5392
5393bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
5394 const OperandVector &Operands,
5395 const SMLoc &IDLoc) {
5396 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
5397 AMDGPU::OpName::cpol);
5398 if (CPolPos == -1)
5399 return true;
5400
5401 unsigned CPol = Inst.getOperand(CPolPos).getImm();
5402
5403 if (!isGFX1250()) {
5404 if (CPol & CPol::SCAL) {
5405 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5406 StringRef CStr(S.getPointer());
5407 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5408 Error(S, "scale_offset is not supported on this GPU");
5409 }
5410 if (CPol & CPol::NV) {
5411 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5412 StringRef CStr(S.getPointer());
5413 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]);
5414 Error(S, "nv is not supported on this GPU");
5415 }
5416 }
5417
5418 if ((CPol & CPol::SCAL) && !supportsScaleOffset(MII, Inst.getOpcode())) {
5419 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5420 StringRef CStr(S.getPointer());
5421 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5422 Error(S, "scale_offset is not supported for this instruction");
5423 }
5424
5425 if (isGFX12Plus())
5426 return validateTHAndScopeBits(Inst, Operands, CPol);
5427
5428 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5429 if (TSFlags & SIInstrFlags::SMRD) {
5430 if (CPol && (isSI() || isCI())) {
5431 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5432 Error(S, "cache policy is not supported for SMRD instructions");
5433 return false;
5434 }
5435 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
5436 Error(IDLoc, "invalid cache policy for SMEM instruction");
5437 return false;
5438 }
5439 }
5440
5441 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
5442 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
5445 if (!(TSFlags & AllowSCCModifier)) {
5446 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5447 StringRef CStr(S.getPointer());
5448 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
5449 Error(S,
5450 "scc modifier is not supported for this instruction on this GPU");
5451 return false;
5452 }
5453 }
5454
5456 return true;
5457
5458 if (TSFlags & SIInstrFlags::IsAtomicRet) {
5459 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
5460 Error(IDLoc, isGFX940() ? "instruction must use sc0"
5461 : "instruction must use glc");
5462 return false;
5463 }
5464 } else {
5465 if (CPol & CPol::GLC) {
5466 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5467 StringRef CStr(S.getPointer());
5469 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
5470 Error(S, isGFX940() ? "instruction must not use sc0"
5471 : "instruction must not use glc");
5472 return false;
5473 }
5474 }
5475
5476 return true;
5477}
5478
5479bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
5480 const OperandVector &Operands,
5481 const unsigned CPol) {
5482 const unsigned TH = CPol & AMDGPU::CPol::TH;
5483 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
5484
5485 const unsigned Opcode = Inst.getOpcode();
5486 const MCInstrDesc &TID = MII.get(Opcode);
5487
5488 auto PrintError = [&](StringRef Msg) {
5489 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5490 Error(S, Msg);
5491 return false;
5492 };
5493
5494 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
5497 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
5498
5499 if (TH == 0)
5500 return true;
5501
5502 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
5503 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
5504 (TH == AMDGPU::CPol::TH_NT_HT)))
5505 return PrintError("invalid th value for SMEM instruction");
5506
5507 if (TH == AMDGPU::CPol::TH_BYPASS) {
5508 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5510 (Scope == AMDGPU::CPol::SCOPE_SYS &&
5512 return PrintError("scope and th combination is not valid");
5513 }
5514
5515 unsigned THType = AMDGPU::getTemporalHintType(TID);
5516 if (THType == AMDGPU::CPol::TH_TYPE_ATOMIC) {
5517 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5518 return PrintError("invalid th value for atomic instructions");
5519 } else if (THType == AMDGPU::CPol::TH_TYPE_STORE) {
5520 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5521 return PrintError("invalid th value for store instructions");
5522 } else {
5523 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5524 return PrintError("invalid th value for load instructions");
5525 }
5526
5527 return true;
5528}
5529
5530bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5531 const OperandVector &Operands) {
5532 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5533 if (Desc.mayStore() &&
5535 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5536 if (Loc != getInstLoc(Operands)) {
5537 Error(Loc, "TFE modifier has no meaning for store instructions");
5538 return false;
5539 }
5540 }
5541
5542 return true;
5543}
5544
5545bool AMDGPUAsmParser::validateWMMA(const MCInst &Inst,
5546 const OperandVector &Operands) {
5547 unsigned Opc = Inst.getOpcode();
5548 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5549 const MCInstrDesc &Desc = MII.get(Opc);
5550
5551 auto validateFmt = [&](AMDGPU::OpName FmtOp, AMDGPU::OpName SrcOp) -> bool {
5552 int FmtIdx = AMDGPU::getNamedOperandIdx(Opc, FmtOp);
5553 if (FmtIdx == -1)
5554 return true;
5555 unsigned Fmt = Inst.getOperand(FmtIdx).getImm();
5556 int SrcIdx = AMDGPU::getNamedOperandIdx(Opc, SrcOp);
5557 unsigned RegSize =
5558 TRI->getRegClass(Desc.operands()[SrcIdx].RegClass).getSizeInBits();
5559
5561 return true;
5562
5563 static const char *FmtNames[] = {"MATRIX_FMT_FP8", "MATRIX_FMT_BF8",
5564 "MATRIX_FMT_FP6", "MATRIX_FMT_BF6",
5565 "MATRIX_FMT_FP4"};
5566
5567 Error(getRegLoc(mc2PseudoReg(Inst.getOperand(SrcIdx).getReg()), Operands),
5568 "wrong register tuple size for " + Twine(FmtNames[Fmt]));
5569 return false;
5570 };
5571
5572 return validateFmt(AMDGPU::OpName::matrix_a_fmt, AMDGPU::OpName::src0) &&
5573 validateFmt(AMDGPU::OpName::matrix_b_fmt, AMDGPU::OpName::src1);
5574}
5575
5576bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
5577 const SMLoc &IDLoc,
5578 const OperandVector &Operands) {
5579 if (auto ErrMsg = validateLdsDirect(Inst)) {
5580 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
5581 return false;
5582 }
5583 if (!validateTrue16OpSel(Inst)) {
5584 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5585 "op_sel operand conflicts with 16-bit operand suffix");
5586 return false;
5587 }
5588 if (!validateSOPLiteral(Inst)) {
5589 Error(getLitLoc(Operands),
5590 "only one unique literal operand is allowed");
5591 return false;
5592 }
5593 if (!validateVOPLiteral(Inst, Operands)) {
5594 return false;
5595 }
5596 if (!validateConstantBusLimitations(Inst, Operands)) {
5597 return false;
5598 }
5599 if (!validateVOPD(Inst, Operands)) {
5600 return false;
5601 }
5602 if (!validateIntClampSupported(Inst)) {
5603 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5604 "integer clamping is not supported on this GPU");
5605 return false;
5606 }
5607 if (!validateOpSel(Inst)) {
5608 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5609 "invalid op_sel operand");
5610 return false;
5611 }
5612 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5613 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5614 "invalid neg_lo operand");
5615 return false;
5616 }
5617 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5618 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5619 "invalid neg_hi operand");
5620 return false;
5621 }
5622 if (!validateDPP(Inst, Operands)) {
5623 return false;
5624 }
5625 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5626 if (!validateMIMGD16(Inst)) {
5627 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5628 "d16 modifier is not supported on this GPU");
5629 return false;
5630 }
5631 if (!validateMIMGDim(Inst, Operands)) {
5632 Error(IDLoc, "missing dim operand");
5633 return false;
5634 }
5635 if (!validateTensorR128(Inst)) {
5636 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5637 "instruction must set modifier r128=0");
5638 return false;
5639 }
5640 if (!validateMIMGMSAA(Inst)) {
5641 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5642 "invalid dim; must be MSAA type");
5643 return false;
5644 }
5645 if (!validateMIMGDataSize(Inst, IDLoc)) {
5646 return false;
5647 }
5648 if (!validateMIMGAddrSize(Inst, IDLoc))
5649 return false;
5650 if (!validateMIMGAtomicDMask(Inst)) {
5651 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5652 "invalid atomic image dmask");
5653 return false;
5654 }
5655 if (!validateMIMGGatherDMask(Inst)) {
5656 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5657 "invalid image_gather dmask: only one bit must be set");
5658 return false;
5659 }
5660 if (!validateMovrels(Inst, Operands)) {
5661 return false;
5662 }
5663 if (!validateOffset(Inst, Operands)) {
5664 return false;
5665 }
5666 if (!validateMAIAccWrite(Inst, Operands)) {
5667 return false;
5668 }
5669 if (!validateMAISrc2(Inst, Operands)) {
5670 return false;
5671 }
5672 if (!validateMFMA(Inst, Operands)) {
5673 return false;
5674 }
5675 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5676 return false;
5677 }
5678
5679 if (!validateAGPRLdSt(Inst)) {
5680 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5681 ? "invalid register class: data and dst should be all VGPR or AGPR"
5682 : "invalid register class: agpr loads and stores not supported on this GPU"
5683 );
5684 return false;
5685 }
5686 if (!validateVGPRAlign(Inst)) {
5687 Error(IDLoc,
5688 "invalid register class: vgpr tuples must be 64 bit aligned");
5689 return false;
5690 }
5691 if (!validateDS(Inst, Operands)) {
5692 return false;
5693 }
5694
5695 if (!validateBLGP(Inst, Operands)) {
5696 return false;
5697 }
5698
5699 if (!validateDivScale(Inst)) {
5700 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5701 return false;
5702 }
5703 if (!validateWaitCnt(Inst, Operands)) {
5704 return false;
5705 }
5706 if (!validateTFE(Inst, Operands)) {
5707 return false;
5708 }
5709 if (!validateWMMA(Inst, Operands)) {
5710 return false;
5711 }
5712
5713 return true;
5714}
5715
5717 const FeatureBitset &FBS,
5718 unsigned VariantID = 0);
5719
5720static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5721 const FeatureBitset &AvailableFeatures,
5722 unsigned VariantID);
5723
5724bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5725 const FeatureBitset &FBS) {
5726 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5727}
5728
5729bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5730 const FeatureBitset &FBS,
5731 ArrayRef<unsigned> Variants) {
5732 for (auto Variant : Variants) {
5733 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5734 return true;
5735 }
5736
5737 return false;
5738}
5739
5740bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5741 const SMLoc &IDLoc) {
5742 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5743
5744 // Check if requested instruction variant is supported.
5745 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5746 return false;
5747
5748 // This instruction is not supported.
5749 // Clear any other pending errors because they are no longer relevant.
5750 getParser().clearPendingErrors();
5751
5752 // Requested instruction variant is not supported.
5753 // Check if any other variants are supported.
5754 StringRef VariantName = getMatchedVariantName();
5755 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5756 return Error(IDLoc,
5757 Twine(VariantName,
5758 " variant of this instruction is not supported"));
5759 }
5760
5761 // Check if this instruction may be used with a different wavesize.
5762 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5763 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5764
5765 FeatureBitset FeaturesWS32 = getFeatureBits();
5766 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5767 .flip(AMDGPU::FeatureWavefrontSize32);
5768 FeatureBitset AvailableFeaturesWS32 =
5769 ComputeAvailableFeatures(FeaturesWS32);
5770
5771 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5772 return Error(IDLoc, "instruction requires wavesize=32");
5773 }
5774
5775 // Finally check if this instruction is supported on any other GPU.
5776 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5777 return Error(IDLoc, "instruction not supported on this GPU");
5778 }
5779
5780 // Instruction not supported on any GPU. Probably a typo.
5781 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5782 return Error(IDLoc, "invalid instruction" + Suggestion);
5783}
5784
5786 uint64_t InvalidOprIdx) {
5787 assert(InvalidOprIdx < Operands.size());
5788 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5789 if (Op.isToken() && InvalidOprIdx > 1) {
5790 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5791 return PrevOp.isToken() && PrevOp.getToken() == "::";
5792 }
5793 return false;
5794}
5795
5796bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5798 MCStreamer &Out,
5800 bool MatchingInlineAsm) {
5801 MCInst Inst;
5802 unsigned Result = Match_Success;
5803 for (auto Variant : getMatchedVariants()) {
5804 uint64_t EI;
5805 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5806 Variant);
5807 // We order match statuses from least to most specific. We use most specific
5808 // status as resulting
5809 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5810 if (R == Match_Success || R == Match_MissingFeature ||
5811 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5812 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5813 Result != Match_MissingFeature)) {
5814 Result = R;
5815 ErrorInfo = EI;
5816 }
5817 if (R == Match_Success)
5818 break;
5819 }
5820
5821 if (Result == Match_Success) {
5822 if (!validateInstruction(Inst, IDLoc, Operands)) {
5823 return true;
5824 }
5825 Inst.setLoc(IDLoc);
5826 Out.emitInstruction(Inst, getSTI());
5827 return false;
5828 }
5829
5830 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5831 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5832 return true;
5833 }
5834
5835 switch (Result) {
5836 default: break;
5837 case Match_MissingFeature:
5838 // It has been verified that the specified instruction
5839 // mnemonic is valid. A match was found but it requires
5840 // features which are not supported on this GPU.
5841 return Error(IDLoc, "operands are not valid for this GPU or mode");
5842
5843 case Match_InvalidOperand: {
5844 SMLoc ErrorLoc = IDLoc;
5845 if (ErrorInfo != ~0ULL) {
5846 if (ErrorInfo >= Operands.size()) {
5847 return Error(IDLoc, "too few operands for instruction");
5848 }
5849 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5850 if (ErrorLoc == SMLoc())
5851 ErrorLoc = IDLoc;
5852
5854 return Error(ErrorLoc, "invalid VOPDY instruction");
5855 }
5856 return Error(ErrorLoc, "invalid operand for instruction");
5857 }
5858
5859 case Match_MnemonicFail:
5860 llvm_unreachable("Invalid instructions should have been handled already");
5861 }
5862 llvm_unreachable("Implement any new match types added!");
5863}
5864
5865bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5866 int64_t Tmp = -1;
5867 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5868 return true;
5869 }
5870 if (getParser().parseAbsoluteExpression(Tmp)) {
5871 return true;
5872 }
5873 Ret = static_cast<uint32_t>(Tmp);
5874 return false;
5875}
5876
5877bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5878 if (!getSTI().getTargetTriple().isAMDGCN())
5879 return TokError("directive only supported for amdgcn architecture");
5880
5881 std::string TargetIDDirective;
5882 SMLoc TargetStart = getTok().getLoc();
5883 if (getParser().parseEscapedString(TargetIDDirective))
5884 return true;
5885
5886 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5887 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5888 return getParser().Error(TargetRange.Start,
5889 (Twine(".amdgcn_target directive's target id ") +
5890 Twine(TargetIDDirective) +
5891 Twine(" does not match the specified target id ") +
5892 Twine(getTargetStreamer().getTargetID()->toString())).str());
5893
5894 return false;
5895}
5896
5897bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5898 return Error(Range.Start, "value out of range", Range);
5899}
5900
5901bool AMDGPUAsmParser::calculateGPRBlocks(
5902 const FeatureBitset &Features, const MCExpr *VCCUsed,
5903 const MCExpr *FlatScrUsed, bool XNACKUsed,
5904 std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
5905 SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5906 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) {
5907 // TODO(scott.linder): These calculations are duplicated from
5908 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5909 IsaVersion Version = getIsaVersion(getSTI().getCPU());
5910 MCContext &Ctx = getContext();
5911
5912 const MCExpr *NumSGPRs = NextFreeSGPR;
5913 int64_t EvaluatedSGPRs;
5914
5915 if (Version.Major >= 10)
5917 else {
5918 unsigned MaxAddressableNumSGPRs =
5920
5921 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && Version.Major >= 8 &&
5922 !Features.test(FeatureSGPRInitBug) &&
5923 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5924 return OutOfRangeError(SGPRRange);
5925
5926 const MCExpr *ExtraSGPRs =
5927 AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
5928 NumSGPRs = MCBinaryExpr::createAdd(NumSGPRs, ExtraSGPRs, Ctx);
5929
5930 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5931 (Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5932 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5933 return OutOfRangeError(SGPRRange);
5934
5935 if (Features.test(FeatureSGPRInitBug))
5936 NumSGPRs =
5938 }
5939
5940 // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
5941 // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
5942 auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
5943 unsigned Granule) -> const MCExpr * {
5944 const MCExpr *OneConst = MCConstantExpr::create(1ul, Ctx);
5945 const MCExpr *GranuleConst = MCConstantExpr::create(Granule, Ctx);
5946 const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);
5947 const MCExpr *AlignToGPR =
5948 AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);
5949 const MCExpr *DivGPR =
5950 MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);
5951 const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
5952 return SubGPR;
5953 };
5954
5955 VGPRBlocks = GetNumGPRBlocks(
5956 NextFreeVGPR,
5957 IsaInfo::getVGPREncodingGranule(&getSTI(), EnableWavefrontSize32));
5958 SGPRBlocks =
5959 GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(&getSTI()));
5960
5961 return false;
5962}
5963
5964bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5965 if (!getSTI().getTargetTriple().isAMDGCN())
5966 return TokError("directive only supported for amdgcn architecture");
5967
5968 if (!isHsaAbi(getSTI()))
5969 return TokError("directive only supported for amdhsa OS");
5970
5971 StringRef KernelName;
5972 if (getParser().parseIdentifier(KernelName))
5973 return true;
5974
5977 &getSTI(), getContext());
5978
5979 StringSet<> Seen;
5980
5981 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5982
5983 const MCExpr *ZeroExpr = MCConstantExpr::create(0, getContext());
5984 const MCExpr *OneExpr = MCConstantExpr::create(1, getContext());
5985
5986 SMRange VGPRRange;
5987 const MCExpr *NextFreeVGPR = ZeroExpr;
5988 const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext());
5989 const MCExpr *NamedBarCnt = ZeroExpr;
5990 uint64_t SharedVGPRCount = 0;
5991 uint64_t PreloadLength = 0;
5992 uint64_t PreloadOffset = 0;
5993 SMRange SGPRRange;
5994 const MCExpr *NextFreeSGPR = ZeroExpr;
5995
5996 // Count the number of user SGPRs implied from the enabled feature bits.
5997 unsigned ImpliedUserSGPRCount = 0;
5998
5999 // Track if the asm explicitly contains the directive for the user SGPR
6000 // count.
6001 std::optional<unsigned> ExplicitUserSGPRCount;
6002 const MCExpr *ReserveVCC = OneExpr;
6003 const MCExpr *ReserveFlatScr = OneExpr;
6004 std::optional<bool> EnableWavefrontSize32;
6005
6006 while (true) {
6007 while (trySkipToken(AsmToken::EndOfStatement));
6008
6009 StringRef ID;
6010 SMRange IDRange = getTok().getLocRange();
6011 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
6012 return true;
6013
6014 if (ID == ".end_amdhsa_kernel")
6015 break;
6016
6017 if (!Seen.insert(ID).second)
6018 return TokError(".amdhsa_ directives cannot be repeated");
6019
6020 SMLoc ValStart = getLoc();
6021 const MCExpr *ExprVal;
6022 if (getParser().parseExpression(ExprVal))
6023 return true;
6024 SMLoc ValEnd = getLoc();
6025 SMRange ValRange = SMRange(ValStart, ValEnd);
6026
6027 int64_t IVal = 0;
6028 uint64_t Val = IVal;
6029 bool EvaluatableExpr;
6030 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
6031 if (IVal < 0)
6032 return OutOfRangeError(ValRange);
6033 Val = IVal;
6034 }
6035
6036#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
6037 if (!isUInt<ENTRY##_WIDTH>(Val)) \
6038 return OutOfRangeError(RANGE); \
6039 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
6040 getContext());
6041
6042// Some fields use the parsed value immediately which requires the expression to
6043// be solvable.
6044#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
6045 if (!(RESOLVED)) \
6046 return Error(IDRange.Start, "directive should have resolvable expression", \
6047 IDRange);
6048
6049 if (ID == ".amdhsa_group_segment_fixed_size") {
6051 CHAR_BIT>(Val))
6052 return OutOfRangeError(ValRange);
6053 KD.group_segment_fixed_size = ExprVal;
6054 } else if (ID == ".amdhsa_private_segment_fixed_size") {
6056 CHAR_BIT>(Val))
6057 return OutOfRangeError(ValRange);
6058 KD.private_segment_fixed_size = ExprVal;
6059 } else if (ID == ".amdhsa_kernarg_size") {
6060 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
6061 return OutOfRangeError(ValRange);
6062 KD.kernarg_size = ExprVal;
6063 } else if (ID == ".amdhsa_user_sgpr_count") {
6064 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6065 ExplicitUserSGPRCount = Val;
6066 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
6067 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6069 return Error(IDRange.Start,
6070 "directive is not supported with architected flat scratch",
6071 IDRange);
6073 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
6074 ExprVal, ValRange);
6075 if (Val)
6076 ImpliedUserSGPRCount += 4;
6077 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
6078 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6079 if (!hasKernargPreload())
6080 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6081
6082 if (Val > getMaxNumUserSGPRs())
6083 return OutOfRangeError(ValRange);
6084 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
6085 ValRange);
6086 if (Val) {
6087 ImpliedUserSGPRCount += Val;
6088 PreloadLength = Val;
6089 }
6090 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
6091 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6092 if (!hasKernargPreload())
6093 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6094
6095 if (Val >= 1024)
6096 return OutOfRangeError(ValRange);
6097 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
6098 ValRange);
6099 if (Val)
6100 PreloadOffset = Val;
6101 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
6102 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6104 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
6105 ValRange);
6106 if (Val)
6107 ImpliedUserSGPRCount += 2;
6108 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
6109 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6111 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
6112 ValRange);
6113 if (Val)
6114 ImpliedUserSGPRCount += 2;
6115 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
6116 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6118 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
6119 ExprVal, ValRange);
6120 if (Val)
6121 ImpliedUserSGPRCount += 2;
6122 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
6123 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6125 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
6126 ValRange);
6127 if (Val)
6128 ImpliedUserSGPRCount += 2;
6129 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
6131 return Error(IDRange.Start,
6132 "directive is not supported with architected flat scratch",
6133 IDRange);
6134 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6136 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
6137 ExprVal, ValRange);
6138 if (Val)
6139 ImpliedUserSGPRCount += 2;
6140 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
6141 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6143 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
6144 ExprVal, ValRange);
6145 if (Val)
6146 ImpliedUserSGPRCount += 1;
6147 } else if (ID == ".amdhsa_uses_cu_stores") {
6148 if (!isGFX1250())
6149 return Error(IDRange.Start, "directive requires gfx12.5", IDRange);
6150
6152 KERNEL_CODE_PROPERTY_USES_CU_STORES, ExprVal, ValRange);
6153 } else if (ID == ".amdhsa_wavefront_size32") {
6154 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6155 if (IVersion.Major < 10)
6156 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6157 EnableWavefrontSize32 = Val;
6159 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
6160 ValRange);
6161 } else if (ID == ".amdhsa_uses_dynamic_stack") {
6163 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
6164 ValRange);
6165 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
6167 return Error(IDRange.Start,
6168 "directive is not supported with architected flat scratch",
6169 IDRange);
6171 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6172 ValRange);
6173 } else if (ID == ".amdhsa_enable_private_segment") {
6175 return Error(
6176 IDRange.Start,
6177 "directive is not supported without architected flat scratch",
6178 IDRange);
6180 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6181 ValRange);
6182 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
6184 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
6185 ValRange);
6186 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
6188 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
6189 ValRange);
6190 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
6192 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
6193 ValRange);
6194 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
6196 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
6197 ValRange);
6198 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
6200 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
6201 ValRange);
6202 } else if (ID == ".amdhsa_next_free_vgpr") {
6203 VGPRRange = ValRange;
6204 NextFreeVGPR = ExprVal;
6205 } else if (ID == ".amdhsa_next_free_sgpr") {
6206 SGPRRange = ValRange;
6207 NextFreeSGPR = ExprVal;
6208 } else if (ID == ".amdhsa_accum_offset") {
6209 if (!isGFX90A())
6210 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6211 AccumOffset = ExprVal;
6212 } else if (ID == ".amdhsa_named_barrier_count") {
6213 if (!isGFX1250())
6214 return Error(IDRange.Start, "directive requires gfx1250+", IDRange);
6215 NamedBarCnt = ExprVal;
6216 } else if (ID == ".amdhsa_reserve_vcc") {
6217 if (EvaluatableExpr && !isUInt<1>(Val))
6218 return OutOfRangeError(ValRange);
6219 ReserveVCC = ExprVal;
6220 } else if (ID == ".amdhsa_reserve_flat_scratch") {
6221 if (IVersion.Major < 7)
6222 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
6224 return Error(IDRange.Start,
6225 "directive is not supported with architected flat scratch",
6226 IDRange);
6227 if (EvaluatableExpr && !isUInt<1>(Val))
6228 return OutOfRangeError(ValRange);
6229 ReserveFlatScr = ExprVal;
6230 } else if (ID == ".amdhsa_reserve_xnack_mask") {
6231 if (IVersion.Major < 8)
6232 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
6233 if (!isUInt<1>(Val))
6234 return OutOfRangeError(ValRange);
6235 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
6236 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
6237 IDRange);
6238 } else if (ID == ".amdhsa_float_round_mode_32") {
6240 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
6241 ValRange);
6242 } else if (ID == ".amdhsa_float_round_mode_16_64") {
6244 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
6245 ValRange);
6246 } else if (ID == ".amdhsa_float_denorm_mode_32") {
6248 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
6249 ValRange);
6250 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
6252 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
6253 ValRange);
6254 } else if (ID == ".amdhsa_dx10_clamp") {
6255 if (IVersion.Major >= 12)
6256 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
6258 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
6259 ValRange);
6260 } else if (ID == ".amdhsa_ieee_mode") {
6261 if (IVersion.Major >= 12)
6262 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
6264 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
6265 ValRange);
6266 } else if (ID == ".amdhsa_fp16_overflow") {
6267 if (IVersion.Major < 9)
6268 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
6270 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
6271 ValRange);
6272 } else if (ID == ".amdhsa_tg_split") {
6273 if (!isGFX90A())
6274 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6275 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
6276 ExprVal, ValRange);
6277 } else if (ID == ".amdhsa_workgroup_processor_mode") {
6278 if (!supportsWGP(getSTI()))
6279 return Error(IDRange.Start,
6280 "directive unsupported on " + getSTI().getCPU(), IDRange);
6282 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
6283 ValRange);
6284 } else if (ID == ".amdhsa_memory_ordered") {
6285 if (IVersion.Major < 10)
6286 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6288 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
6289 ValRange);
6290 } else if (ID == ".amdhsa_forward_progress") {
6291 if (IVersion.Major < 10)
6292 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6294 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
6295 ValRange);
6296 } else if (ID == ".amdhsa_shared_vgpr_count") {
6297 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6298 if (IVersion.Major < 10 || IVersion.Major >= 12)
6299 return Error(IDRange.Start, "directive requires gfx10 or gfx11",
6300 IDRange);
6301 SharedVGPRCount = Val;
6303 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
6304 ValRange);
6305 } else if (ID == ".amdhsa_inst_pref_size") {
6306 if (IVersion.Major < 11)
6307 return Error(IDRange.Start, "directive requires gfx11+", IDRange);
6308 if (IVersion.Major == 11) {
6310 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
6311 ValRange);
6312 } else {
6314 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
6315 ValRange);
6316 }
6317 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
6320 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
6321 ExprVal, ValRange);
6322 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
6324 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
6325 ExprVal, ValRange);
6326 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
6329 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
6330 ExprVal, ValRange);
6331 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
6333 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
6334 ExprVal, ValRange);
6335 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
6337 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
6338 ExprVal, ValRange);
6339 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
6341 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
6342 ExprVal, ValRange);
6343 } else if (ID == ".amdhsa_exception_int_div_zero") {
6345 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
6346 ExprVal, ValRange);
6347 } else if (ID == ".amdhsa_round_robin_scheduling") {
6348 if (IVersion.Major < 12)
6349 return Error(IDRange.Start, "directive requires gfx12+", IDRange);
6351 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
6352 ValRange);
6353 } else {
6354 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
6355 }
6356
6357#undef PARSE_BITS_ENTRY
6358 }
6359
6360 if (!Seen.contains(".amdhsa_next_free_vgpr"))
6361 return TokError(".amdhsa_next_free_vgpr directive is required");
6362
6363 if (!Seen.contains(".amdhsa_next_free_sgpr"))
6364 return TokError(".amdhsa_next_free_sgpr directive is required");
6365
6366 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
6367
6368 // Consider the case where the total number of UserSGPRs with trailing
6369 // allocated preload SGPRs, is greater than the number of explicitly
6370 // referenced SGPRs.
6371 if (PreloadLength) {
6372 MCContext &Ctx = getContext();
6373 NextFreeSGPR = AMDGPUMCExpr::createMax(
6374 {NextFreeSGPR, MCConstantExpr::create(UserSGPRCount, Ctx)}, Ctx);
6375 }
6376
6377 const MCExpr *VGPRBlocks;
6378 const MCExpr *SGPRBlocks;
6379 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
6380 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
6381 EnableWavefrontSize32, NextFreeVGPR,
6382 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
6383 SGPRBlocks))
6384 return true;
6385
6386 int64_t EvaluatedVGPRBlocks;
6387 bool VGPRBlocksEvaluatable =
6388 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
6389 if (VGPRBlocksEvaluatable &&
6390 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
6391 static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
6392 return OutOfRangeError(VGPRRange);
6393 }
6395 KD.compute_pgm_rsrc1, VGPRBlocks,
6396 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
6397 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
6398
6399 int64_t EvaluatedSGPRBlocks;
6400 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
6401 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
6402 static_cast<uint64_t>(EvaluatedSGPRBlocks)))
6403 return OutOfRangeError(SGPRRange);
6405 KD.compute_pgm_rsrc1, SGPRBlocks,
6406 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
6407 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
6408
6409 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
6410 return TokError("amdgpu_user_sgpr_count smaller than than implied by "
6411 "enabled user SGPRs");
6412
6413 if (isGFX1250()) {
6414 if (!isUInt<COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
6415 return TokError("too many user SGPRs enabled");
6418 MCConstantExpr::create(UserSGPRCount, getContext()),
6419 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
6420 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT, getContext());
6421 } else {
6422 if (!isUInt<COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_WIDTH>(
6423 UserSGPRCount))
6424 return TokError("too many user SGPRs enabled");
6427 MCConstantExpr::create(UserSGPRCount, getContext()),
6428 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
6429 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT, getContext());
6430 }
6431
6432 int64_t IVal = 0;
6433 if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
6434 return TokError("Kernarg size should be resolvable");
6435 uint64_t kernarg_size = IVal;
6436 if (PreloadLength && kernarg_size &&
6437 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
6438 return TokError("Kernarg preload length + offset is larger than the "
6439 "kernarg segment size");
6440
6441 if (isGFX90A()) {
6442 if (!Seen.contains(".amdhsa_accum_offset"))
6443 return TokError(".amdhsa_accum_offset directive is required");
6444 int64_t EvaluatedAccum;
6445 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
6446 uint64_t UEvaluatedAccum = EvaluatedAccum;
6447 if (AccumEvaluatable &&
6448 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
6449 return TokError("accum_offset should be in range [4..256] in "
6450 "increments of 4");
6451
6452 int64_t EvaluatedNumVGPR;
6453 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
6454 AccumEvaluatable &&
6455 UEvaluatedAccum >
6456 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
6457 return TokError("accum_offset exceeds total VGPR allocation");
6458 const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
6460 AccumOffset, MCConstantExpr::create(4, getContext()), getContext()),
6461 MCConstantExpr::create(1, getContext()), getContext());
6463 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6464 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6465 getContext());
6466 }
6467
6468 if (isGFX1250())
6470 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
6471 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
6472 getContext());
6473
6474 if (IVersion.Major >= 10 && IVersion.Major < 12) {
6475 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
6476 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6477 return TokError("shared_vgpr_count directive not valid on "
6478 "wavefront size 32");
6479 }
6480
6481 if (VGPRBlocksEvaluatable &&
6482 (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
6483 63)) {
6484 return TokError("shared_vgpr_count*2 + "
6485 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6486 "exceed 63\n");
6487 }
6488 }
6489
6490 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
6491 NextFreeVGPR, NextFreeSGPR,
6492 ReserveVCC, ReserveFlatScr);
6493 return false;
6494}
6495
6496bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6498 if (ParseAsAbsoluteExpression(Version))
6499 return true;
6500
6501 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
6502 return false;
6503}
6504
6505bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
6507 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
6508 // assembly for backwards compatibility.
6509 if (ID == "max_scratch_backing_memory_byte_size") {
6510 Parser.eatToEndOfStatement();
6511 return false;
6512 }
6513
6514 SmallString<40> ErrStr;
6515 raw_svector_ostream Err(ErrStr);
6516 if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
6517 return TokError(Err.str());
6518 }
6519 Lex();
6520
6521 if (ID == "enable_wavefront_size32") {
6522 if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
6523 if (!isGFX10Plus())
6524 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
6525 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
6526 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
6527 } else {
6528 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
6529 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
6530 }
6531 }
6532
6533 if (ID == "wavefront_size") {
6534 if (C.wavefront_size == 5) {
6535 if (!isGFX10Plus())
6536 return TokError("wavefront_size=5 is only allowed on GFX10+");
6537 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
6538 return TokError("wavefront_size=5 requires +WavefrontSize32");
6539 } else if (C.wavefront_size == 6) {
6540 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
6541 return TokError("wavefront_size=6 requires +WavefrontSize64");
6542 }
6543 }
6544
6545 return false;
6546}
6547
6548bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6549 AMDGPUMCKernelCodeT KernelCode;
6550 KernelCode.initDefault(&getSTI(), getContext());
6551
6552 while (true) {
6553 // Lex EndOfStatement. This is in a while loop, because lexing a comment
6554 // will set the current token to EndOfStatement.
6555 while(trySkipToken(AsmToken::EndOfStatement));
6556
6557 StringRef ID;
6558 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
6559 return true;
6560
6561 if (ID == ".end_amd_kernel_code_t")
6562 break;
6563
6564 if (ParseAMDKernelCodeTValue(ID, KernelCode))
6565 return true;
6566 }
6567
6568 KernelCode.validate(&getSTI(), getContext());
6569 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6570
6571 return false;
6572}
6573
6574bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6575 StringRef KernelName;
6576 if (!parseId(KernelName, "expected symbol name"))
6577 return true;
6578
6579 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6581
6582 KernelScope.initialize(getContext());
6583 return false;
6584}
6585
6586bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6587 if (!getSTI().getTargetTriple().isAMDGCN()) {
6588 return Error(getLoc(),
6589 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6590 "architectures");
6591 }
6592
6593 auto TargetIDDirective = getLexer().getTok().getStringContents();
6594 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6595 return Error(getParser().getTok().getLoc(), "target id must match options");
6596
6597 getTargetStreamer().EmitISAVersion();
6598 Lex();
6599
6600 return false;
6601}
6602
6603bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6604 assert(isHsaAbi(getSTI()));
6605
6606 std::string HSAMetadataString;
6607 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
6608 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
6609 return true;
6610
6611 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6612 return Error(getLoc(), "invalid HSA metadata");
6613
6614 return false;
6615}
6616
6617/// Common code to parse out a block of text (typically YAML) between start and
6618/// end directives.
6619bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6620 const char *AssemblerDirectiveEnd,
6621 std::string &CollectString) {
6622
6623 raw_string_ostream CollectStream(CollectString);
6624
6625 getLexer().setSkipSpace(false);
6626
6627 bool FoundEnd = false;
6628 while (!isToken(AsmToken::Eof)) {
6629 while (isToken(AsmToken::Space)) {
6630 CollectStream << getTokenStr();
6631 Lex();
6632 }
6633
6634 if (trySkipId(AssemblerDirectiveEnd)) {
6635 FoundEnd = true;
6636 break;
6637 }
6638
6639 CollectStream << Parser.parseStringToEndOfStatement()
6640 << getContext().getAsmInfo()->getSeparatorString();
6641
6642 Parser.eatToEndOfStatement();
6643 }
6644
6645 getLexer().setSkipSpace(true);
6646
6647 if (isToken(AsmToken::Eof) && !FoundEnd) {
6648 return TokError(Twine("expected directive ") +
6649 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6650 }
6651
6652 return false;
6653}
6654
6655/// Parse the assembler directive for new MsgPack-format PAL metadata.
6656bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6657 std::string String;
6658 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6660 return true;
6661
6662 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6663 if (!PALMetadata->setFromString(String))
6664 return Error(getLoc(), "invalid PAL metadata");
6665 return false;
6666}
6667
6668/// Parse the assembler directive for old linear-format PAL metadata.
6669bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6670 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6671 return Error(getLoc(),
6672 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6673 "not available on non-amdpal OSes")).str());
6674 }
6675
6676 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6677 PALMetadata->setLegacy();
6678 for (;;) {
6680 if (ParseAsAbsoluteExpression(Key)) {
6681 return TokError(Twine("invalid value in ") +
6683 }
6684 if (!trySkipToken(AsmToken::Comma)) {
6685 return TokError(Twine("expected an even number of values in ") +
6687 }
6688 if (ParseAsAbsoluteExpression(Value)) {
6689 return TokError(Twine("invalid value in ") +
6691 }
6692 PALMetadata->setRegister(Key, Value);
6693 if (!trySkipToken(AsmToken::Comma))
6694 break;
6695 }
6696 return false;
6697}
6698
6699/// ParseDirectiveAMDGPULDS
6700/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6701bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6702 if (getParser().checkForValidSection())
6703 return true;
6704
6706 SMLoc NameLoc = getLoc();
6707 if (getParser().parseIdentifier(Name))
6708 return TokError("expected identifier in directive");
6709
6710 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6711 if (getParser().parseComma())
6712 return true;
6713
6714 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
6715
6716 int64_t Size;
6717 SMLoc SizeLoc = getLoc();
6718 if (getParser().parseAbsoluteExpression(Size))
6719 return true;
6720 if (Size < 0)
6721 return Error(SizeLoc, "size must be non-negative");
6722 if (Size > LocalMemorySize)
6723 return Error(SizeLoc, "size is too large");
6724
6725 int64_t Alignment = 4;
6726 if (trySkipToken(AsmToken::Comma)) {
6727 SMLoc AlignLoc = getLoc();
6728 if (getParser().parseAbsoluteExpression(Alignment))
6729 return true;
6730 if (Alignment < 0 || !isPowerOf2_64(Alignment))
6731 return Error(AlignLoc, "alignment must be a power of two");
6732
6733 // Alignment larger than the size of LDS is possible in theory, as long
6734 // as the linker manages to place to symbol at address 0, but we do want
6735 // to make sure the alignment fits nicely into a 32-bit integer.
6736 if (Alignment >= 1u << 31)
6737 return Error(AlignLoc, "alignment is too large");
6738 }
6739
6740 if (parseEOL())
6741 return true;
6742
6743 Symbol->redefineIfPossible();
6744 if (!Symbol->isUndefined())
6745 return Error(NameLoc, "invalid symbol redefinition");
6746
6747 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6748 return false;
6749}
6750
6751bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6752 StringRef IDVal = DirectiveID.getString();
6753
6754 if (isHsaAbi(getSTI())) {
6755 if (IDVal == ".amdhsa_kernel")
6756 return ParseDirectiveAMDHSAKernel();
6757
6758 if (IDVal == ".amdhsa_code_object_version")
6759 return ParseDirectiveAMDHSACodeObjectVersion();
6760
6761 // TODO: Restructure/combine with PAL metadata directive.
6763 return ParseDirectiveHSAMetadata();
6764 } else {
6765 if (IDVal == ".amd_kernel_code_t")
6766 return ParseDirectiveAMDKernelCodeT();
6767
6768 if (IDVal == ".amdgpu_hsa_kernel")
6769 return ParseDirectiveAMDGPUHsaKernel();
6770
6771 if (IDVal == ".amd_amdgpu_isa")
6772 return ParseDirectiveISAVersion();
6773
6775 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6776 Twine(" directive is "
6777 "not available on non-amdhsa OSes"))
6778 .str());
6779 }
6780 }
6781
6782 if (IDVal == ".amdgcn_target")
6783 return ParseDirectiveAMDGCNTarget();
6784
6785 if (IDVal == ".amdgpu_lds")
6786 return ParseDirectiveAMDGPULDS();
6787
6788 if (IDVal == PALMD::AssemblerDirectiveBegin)
6789 return ParseDirectivePALMetadataBegin();
6790
6791 if (IDVal == PALMD::AssemblerDirective)
6792 return ParseDirectivePALMetadata();
6793
6794 return true;
6795}
6796
6797bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6798 MCRegister Reg) {
6799 if (MRI.regsOverlap(TTMP12_TTMP13_TTMP14_TTMP15, Reg))
6800 return isGFX9Plus();
6801
6802 // GFX10+ has 2 more SGPRs 104 and 105.
6803 if (MRI.regsOverlap(SGPR104_SGPR105, Reg))
6804 return hasSGPR104_SGPR105();
6805
6806 switch (Reg.id()) {
6807 case SRC_SHARED_BASE_LO:
6808 case SRC_SHARED_BASE:
6809 case SRC_SHARED_LIMIT_LO:
6810 case SRC_SHARED_LIMIT:
6811 case SRC_PRIVATE_BASE_LO:
6812 case SRC_PRIVATE_BASE:
6813 case SRC_PRIVATE_LIMIT_LO:
6814 case SRC_PRIVATE_LIMIT:
6815 return isGFX9Plus();
6816 case SRC_FLAT_SCRATCH_BASE_LO:
6817 case SRC_FLAT_SCRATCH_BASE_HI:
6818 return hasGloballyAddressableScratch();
6819 case SRC_POPS_EXITING_WAVE_ID:
6820 return isGFX9Plus() && !isGFX11Plus();
6821 case TBA:
6822 case TBA_LO:
6823 case TBA_HI:
6824 case TMA:
6825 case TMA_LO:
6826 case TMA_HI:
6827 return !isGFX9Plus();
6828 case XNACK_MASK:
6829 case XNACK_MASK_LO:
6830 case XNACK_MASK_HI:
6831 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6832 case SGPR_NULL:
6833 return isGFX10Plus();
6834 case SRC_EXECZ:
6835 case SRC_VCCZ:
6836 return !isGFX11Plus();
6837 default:
6838 break;
6839 }
6840
6841 if (isCI())
6842 return true;
6843
6844 if (isSI() || isGFX10Plus()) {
6845 // No flat_scr on SI.
6846 // On GFX10Plus flat scratch is not a valid register operand and can only be
6847 // accessed with s_setreg/s_getreg.
6848 switch (Reg.id()) {
6849 case FLAT_SCR:
6850 case FLAT_SCR_LO:
6851 case FLAT_SCR_HI:
6852 return false;
6853 default:
6854 return true;
6855 }
6856 }
6857
6858 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6859 // SI/CI have.
6860 if (MRI.regsOverlap(SGPR102_SGPR103, Reg))
6861 return hasSGPR102_SGPR103();
6862
6863 return true;
6864}
6865
6866ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6867 StringRef Mnemonic,
6868 OperandMode Mode) {
6869 ParseStatus Res = parseVOPD(Operands);
6870 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6871 return Res;
6872
6873 // Try to parse with a custom parser
6874 Res = MatchOperandParserImpl(Operands, Mnemonic);
6875
6876 // If we successfully parsed the operand or if there as an error parsing,
6877 // we are done.
6878 //
6879 // If we are parsing after we reach EndOfStatement then this means we
6880 // are appending default values to the Operands list. This is only done
6881 // by custom parser, so we shouldn't continue on to the generic parsing.
6882 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6883 return Res;
6884
6885 SMLoc RBraceLoc;
6886 SMLoc LBraceLoc = getLoc();
6887 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6888 unsigned Prefix = Operands.size();
6889
6890 for (;;) {
6891 auto Loc = getLoc();
6892 Res = parseReg(Operands);
6893 if (Res.isNoMatch())
6894 Error(Loc, "expected a register");
6895 if (!Res.isSuccess())
6896 return ParseStatus::Failure;
6897
6898 RBraceLoc = getLoc();
6899 if (trySkipToken(AsmToken::RBrac))
6900 break;
6901
6902 if (!skipToken(AsmToken::Comma,
6903 "expected a comma or a closing square bracket"))
6904 return ParseStatus::Failure;
6905 }
6906
6907 if (Operands.size() - Prefix > 1) {
6908 Operands.insert(Operands.begin() + Prefix,
6909 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6910 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6911 }
6912
6913 return ParseStatus::Success;
6914 }
6915
6916 return parseRegOrImm(Operands);
6917}
6918
6919StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6920 // Clear any forced encodings from the previous instruction.
6921 setForcedEncodingSize(0);
6922 setForcedDPP(false);
6923 setForcedSDWA(false);
6924
6925 if (Name.consume_back("_e64_dpp")) {
6926 setForcedDPP(true);
6927 setForcedEncodingSize(64);
6928 return Name;
6929 }
6930 if (Name.consume_back("_e64")) {
6931 setForcedEncodingSize(64);
6932 return Name;
6933 }
6934 if (Name.consume_back("_e32")) {
6935 setForcedEncodingSize(32);
6936 return Name;
6937 }
6938 if (Name.consume_back("_dpp")) {
6939 setForcedDPP(true);
6940 return Name;
6941 }
6942 if (Name.consume_back("_sdwa")) {
6943 setForcedSDWA(true);
6944 return Name;
6945 }
6946 return Name;
6947}
6948
6949static void applyMnemonicAliases(StringRef &Mnemonic,
6950 const FeatureBitset &Features,
6951 unsigned VariantID);
6952
6953bool AMDGPUAsmParser::parseInstruction(ParseInstructionInfo &Info,
6954 StringRef Name, SMLoc NameLoc,
6956 // Add the instruction mnemonic
6957 Name = parseMnemonicSuffix(Name);
6958
6959 // If the target architecture uses MnemonicAlias, call it here to parse
6960 // operands correctly.
6961 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6962
6963 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6964
6965 bool IsMIMG = Name.starts_with("image_");
6966
6967 while (!trySkipToken(AsmToken::EndOfStatement)) {
6968 OperandMode Mode = OperandMode_Default;
6969 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6970 Mode = OperandMode_NSA;
6971 ParseStatus Res = parseOperand(Operands, Name, Mode);
6972
6973 if (!Res.isSuccess()) {
6974 checkUnsupportedInstruction(Name, NameLoc);
6975 if (!Parser.hasPendingError()) {
6976 // FIXME: use real operand location rather than the current location.
6977 StringRef Msg = Res.isFailure() ? "failed parsing operand."
6978 : "not a valid operand.";
6979 Error(getLoc(), Msg);
6980 }
6981 while (!trySkipToken(AsmToken::EndOfStatement)) {
6982 lex();
6983 }
6984 return true;
6985 }
6986
6987 // Eat the comma or space if there is one.
6988 trySkipToken(AsmToken::Comma);
6989 }
6990
6991 return false;
6992}
6993
6994//===----------------------------------------------------------------------===//
6995// Utility functions
6996//===----------------------------------------------------------------------===//
6997
6998ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
7000 SMLoc S = getLoc();
7001 if (!trySkipId(Name))
7002 return ParseStatus::NoMatch;
7003
7004 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
7005 return ParseStatus::Success;
7006}
7007
7008ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
7009 int64_t &IntVal) {
7010
7011 if (!trySkipId(Prefix, AsmToken::Colon))
7012 return ParseStatus::NoMatch;
7013
7015}
7016
7017ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
7018 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7019 std::function<bool(int64_t &)> ConvertResult) {
7020 SMLoc S = getLoc();
7021 int64_t Value = 0;
7022
7023 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
7024 if (!Res.isSuccess())
7025 return Res;
7026
7027 if (ConvertResult && !ConvertResult(Value)) {
7028 Error(S, "invalid " + StringRef(Prefix) + " value.");
7029 }
7030
7031 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
7032 return ParseStatus::Success;
7033}
7034
7035ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
7036 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7037 bool (*ConvertResult)(int64_t &)) {
7038 SMLoc S = getLoc();
7039 if (!trySkipId(Prefix, AsmToken::Colon))
7040 return ParseStatus::NoMatch;
7041
7042 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
7043 return ParseStatus::Failure;
7044
7045 unsigned Val = 0;
7046 const unsigned MaxSize = 4;
7047
7048 // FIXME: How to verify the number of elements matches the number of src
7049 // operands?
7050 for (int I = 0; ; ++I) {
7051 int64_t Op;
7052 SMLoc Loc = getLoc();
7053 if (!parseExpr(Op))
7054 return ParseStatus::Failure;
7055
7056 if (Op != 0 && Op != 1)
7057 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
7058
7059 Val |= (Op << I);
7060
7061 if (trySkipToken(AsmToken::RBrac))
7062 break;
7063
7064 if (I + 1 == MaxSize)
7065 return Error(getLoc(), "expected a closing square bracket");
7066
7067 if (!skipToken(AsmToken::Comma, "expected a comma"))
7068 return ParseStatus::Failure;
7069 }
7070
7071 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
7072 return ParseStatus::Success;
7073}
7074
7075ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
7077 AMDGPUOperand::ImmTy ImmTy) {
7078 int64_t Bit;
7079 SMLoc S = getLoc();
7080
7081 if (trySkipId(Name)) {
7082 Bit = 1;
7083 } else if (trySkipId("no", Name)) {
7084 Bit = 0;
7085 } else {
7086 return ParseStatus::NoMatch;
7087 }
7088
7089 if (Name == "r128" && !hasMIMG_R128())
7090 return Error(S, "r128 modifier is not supported on this GPU");
7091 if (Name == "a16" && !hasA16())
7092 return Error(S, "a16 modifier is not supported on this GPU");
7093
7094 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
7095 ImmTy = AMDGPUOperand::ImmTyR128A16;
7096
7097 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
7098 return ParseStatus::Success;
7099}
7100
7101unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
7102 bool &Disabling) const {
7103 Disabling = Id.consume_front("no");
7104
7105 if (isGFX940() && !Mnemo.starts_with("s_")) {
7106 return StringSwitch<unsigned>(Id)
7107 .Case("nt", AMDGPU::CPol::NT)
7108 .Case("sc0", AMDGPU::CPol::SC0)
7109 .Case("sc1", AMDGPU::CPol::SC1)
7110 .Default(0);
7111 }
7112
7113 return StringSwitch<unsigned>(Id)
7114 .Case("dlc", AMDGPU::CPol::DLC)
7115 .Case("glc", AMDGPU::CPol::GLC)
7116 .Case("scc", AMDGPU::CPol::SCC)
7117 .Case("slc", AMDGPU::CPol::SLC)
7118 .Default(0);
7119}
7120
7121ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
7122 if (isGFX12Plus()) {
7123 SMLoc StringLoc = getLoc();
7124
7125 int64_t CPolVal = 0;
7130
7131 for (;;) {
7132 if (ResTH.isNoMatch()) {
7133 int64_t TH;
7134 ResTH = parseTH(Operands, TH);
7135 if (ResTH.isFailure())
7136 return ResTH;
7137 if (ResTH.isSuccess()) {
7138 CPolVal |= TH;
7139 continue;
7140 }
7141 }
7142
7143 if (ResScope.isNoMatch()) {
7144 int64_t Scope;
7145 ResScope = parseScope(Operands, Scope);
7146 if (ResScope.isFailure())
7147 return ResScope;
7148 if (ResScope.isSuccess()) {
7149 CPolVal |= Scope;
7150 continue;
7151 }
7152 }
7153
7154 // NV bit exists on GFX12+, but does something starting from GFX1250.
7155 // Allow parsing on all GFX12 and fail on validation for better
7156 // diagnostics.
7157 if (ResNV.isNoMatch()) {
7158 if (trySkipId("nv")) {
7159 ResNV = ParseStatus::Success;
7160 CPolVal |= CPol::NV;
7161 continue;
7162 } else if (trySkipId("no", "nv")) {
7163 ResNV = ParseStatus::Success;
7164 continue;
7165 }
7166 }
7167
7168 if (ResScal.isNoMatch()) {
7169 if (trySkipId("scale_offset")) {
7170 ResScal = ParseStatus::Success;
7171 CPolVal |= CPol::SCAL;
7172 continue;
7173 } else if (trySkipId("no", "scale_offset")) {
7174 ResScal = ParseStatus::Success;
7175 continue;
7176 }
7177 }
7178
7179 break;
7180 }
7181
7182 if (ResTH.isNoMatch() && ResScope.isNoMatch() && ResNV.isNoMatch() &&
7183 ResScal.isNoMatch())
7184 return ParseStatus::NoMatch;
7185
7186 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
7187 AMDGPUOperand::ImmTyCPol));
7188 return ParseStatus::Success;
7189 }
7190
7191 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7192 SMLoc OpLoc = getLoc();
7193 unsigned Enabled = 0, Seen = 0;
7194 for (;;) {
7195 SMLoc S = getLoc();
7196 bool Disabling;
7197 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
7198 if (!CPol)
7199 break;
7200
7201 lex();
7202
7203 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
7204 return Error(S, "dlc modifier is not supported on this GPU");
7205
7206 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
7207 return Error(S, "scc modifier is not supported on this GPU");
7208
7209 if (Seen & CPol)
7210 return Error(S, "duplicate cache policy modifier");
7211
7212 if (!Disabling)
7213 Enabled |= CPol;
7214
7215 Seen |= CPol;
7216 }
7217
7218 if (!Seen)
7219 return ParseStatus::NoMatch;
7220
7221 Operands.push_back(
7222 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
7223 return ParseStatus::Success;
7224}
7225
7226ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
7227 int64_t &Scope) {
7228 static const unsigned Scopes[] = {CPol::SCOPE_CU, CPol::SCOPE_SE,
7230
7231 ParseStatus Res = parseStringOrIntWithPrefix(
7232 Operands, "scope", {"SCOPE_CU", "SCOPE_SE", "SCOPE_DEV", "SCOPE_SYS"},
7233 Scope);
7234
7235 if (Res.isSuccess())
7236 Scope = Scopes[Scope];
7237
7238 return Res;
7239}
7240
7241ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
7242 TH = AMDGPU::CPol::TH_RT; // default
7243
7245 SMLoc StringLoc;
7246 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
7247 if (!Res.isSuccess())
7248 return Res;
7249
7250 if (Value == "TH_DEFAULT")
7252 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_WB" ||
7253 Value == "TH_LOAD_NT_WB") {
7254 return Error(StringLoc, "invalid th value");
7255 } else if (Value.consume_front("TH_ATOMIC_")) {
7257 } else if (Value.consume_front("TH_LOAD_")) {
7259 } else if (Value.consume_front("TH_STORE_")) {
7261 } else {
7262 return Error(StringLoc, "invalid th value");
7263 }
7264
7265 if (Value == "BYPASS")
7267
7268 if (TH != 0) {
7275 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
7278 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
7280 .Default(0xffffffff);
7281 else
7288 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
7289 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
7290 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
7291 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
7292 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
7293 .Default(0xffffffff);
7294 }
7295
7296 if (TH == 0xffffffff)
7297 return Error(StringLoc, "invalid th value");
7298
7299 return ParseStatus::Success;
7300}
7301
7302static void
7304 AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
7305 AMDGPUOperand::ImmTy ImmT, int64_t Default = 0,
7306 std::optional<unsigned> InsertAt = std::nullopt) {
7307 auto i = OptionalIdx.find(ImmT);
7308 if (i != OptionalIdx.end()) {
7309 unsigned Idx = i->second;
7310 const AMDGPUOperand &Op =
7311 static_cast<const AMDGPUOperand &>(*Operands[Idx]);
7312 if (InsertAt)
7313 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Op.getImm()));
7314 else
7315 Op.addImmOperands(Inst, 1);
7316 } else {
7317 if (InsertAt.has_value())
7318 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Default));
7319 else
7321 }
7322}
7323
7324ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
7326 SMLoc &StringLoc) {
7327 if (!trySkipId(Prefix, AsmToken::Colon))
7328 return ParseStatus::NoMatch;
7329
7330 StringLoc = getLoc();
7331 return parseId(Value, "expected an identifier") ? ParseStatus::Success
7333}
7334
7335ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7337 int64_t &IntVal) {
7338 if (!trySkipId(Name, AsmToken::Colon))
7339 return ParseStatus::NoMatch;
7340
7341 SMLoc StringLoc = getLoc();
7342
7344 if (isToken(AsmToken::Identifier)) {
7345 Value = getTokenStr();
7346 lex();
7347
7348 for (IntVal = 0; IntVal < (int64_t)Ids.size(); ++IntVal)
7349 if (Value == Ids[IntVal])
7350 break;
7351 } else if (!parseExpr(IntVal))
7352 return ParseStatus::Failure;
7353
7354 if (IntVal < 0 || IntVal >= (int64_t)Ids.size())
7355 return Error(StringLoc, "invalid " + Twine(Name) + " value");
7356
7357 return ParseStatus::Success;
7358}
7359
7360ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7362 AMDGPUOperand::ImmTy Type) {
7363 SMLoc S = getLoc();
7364 int64_t IntVal;
7365
7366 ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
7367 if (Res.isSuccess())
7368 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S, Type));
7369
7370 return Res;
7371}
7372
7373//===----------------------------------------------------------------------===//
7374// MTBUF format
7375//===----------------------------------------------------------------------===//
7376
7377bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
7378 int64_t MaxVal,
7379 int64_t &Fmt) {
7380 int64_t Val;
7381 SMLoc Loc = getLoc();
7382
7383 auto Res = parseIntWithPrefix(Pref, Val);
7384 if (Res.isFailure())
7385 return false;
7386 if (Res.isNoMatch())
7387 return true;
7388
7389 if (Val < 0 || Val > MaxVal) {
7390 Error(Loc, Twine("out of range ", StringRef(Pref)));
7391 return false;
7392 }
7393
7394 Fmt = Val;
7395 return true;
7396}
7397
7398ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
7399 AMDGPUOperand::ImmTy ImmTy) {
7400 const char *Pref = "index_key";
7401 int64_t ImmVal = 0;
7402 SMLoc Loc = getLoc();
7403 auto Res = parseIntWithPrefix(Pref, ImmVal);
7404 if (!Res.isSuccess())
7405 return Res;
7406
7407 if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
7408 ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
7409 (ImmVal < 0 || ImmVal > 1))
7410 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7411
7412 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
7413 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7414
7415 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
7416 return ParseStatus::Success;
7417}
7418
7419ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
7420 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
7421}
7422
7423ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
7424 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
7425}
7426
7427ParseStatus AMDGPUAsmParser::parseIndexKey32bit(OperandVector &Operands) {
7428 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey32bit);
7429}
7430
7431ParseStatus AMDGPUAsmParser::tryParseMatrixFMT(OperandVector &Operands,
7433 AMDGPUOperand::ImmTy Type) {
7434 return parseStringOrIntWithPrefix(Operands, Name,
7435 {"MATRIX_FMT_FP8", "MATRIX_FMT_BF8",
7436 "MATRIX_FMT_FP6", "MATRIX_FMT_BF6",
7437 "MATRIX_FMT_FP4"},
7438 Type);
7439}
7440
7441ParseStatus AMDGPUAsmParser::parseMatrixAFMT(OperandVector &Operands) {
7442 return tryParseMatrixFMT(Operands, "matrix_a_fmt",
7443 AMDGPUOperand::ImmTyMatrixAFMT);
7444}
7445
7446ParseStatus AMDGPUAsmParser::parseMatrixBFMT(OperandVector &Operands) {
7447 return tryParseMatrixFMT(Operands, "matrix_b_fmt",
7448 AMDGPUOperand::ImmTyMatrixBFMT);
7449}
7450
7451ParseStatus AMDGPUAsmParser::tryParseMatrixScale(OperandVector &Operands,
7453 AMDGPUOperand::ImmTy Type) {
7454 return parseStringOrIntWithPrefix(
7455 Operands, Name, {"MATRIX_SCALE_ROW0", "MATRIX_SCALE_ROW1"}, Type);
7456}
7457
7458ParseStatus AMDGPUAsmParser::parseMatrixAScale(OperandVector &Operands) {
7459 return tryParseMatrixScale(Operands, "matrix_a_scale",
7460 AMDGPUOperand::ImmTyMatrixAScale);
7461}
7462
7463ParseStatus AMDGPUAsmParser::parseMatrixBScale(OperandVector &Operands) {
7464 return tryParseMatrixScale(Operands, "matrix_b_scale",
7465 AMDGPUOperand::ImmTyMatrixBScale);
7466}
7467
7468ParseStatus AMDGPUAsmParser::tryParseMatrixScaleFmt(OperandVector &Operands,
7470 AMDGPUOperand::ImmTy Type) {
7471 return parseStringOrIntWithPrefix(
7472 Operands, Name,
7473 {"MATRIX_SCALE_FMT_E8", "MATRIX_SCALE_FMT_E5M3", "MATRIX_SCALE_FMT_E4M3"},
7474 Type);
7475}
7476
7477ParseStatus AMDGPUAsmParser::parseMatrixAScaleFmt(OperandVector &Operands) {
7478 return tryParseMatrixScaleFmt(Operands, "matrix_a_scale_fmt",
7479 AMDGPUOperand::ImmTyMatrixAScaleFmt);
7480}
7481
7482ParseStatus AMDGPUAsmParser::parseMatrixBScaleFmt(OperandVector &Operands) {
7483 return tryParseMatrixScaleFmt(Operands, "matrix_b_scale_fmt",
7484 AMDGPUOperand::ImmTyMatrixBScaleFmt);
7485}
7486
7487// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
7488// values to live in a joint format operand in the MCInst encoding.
7489ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
7490 using namespace llvm::AMDGPU::MTBUFFormat;
7491
7492 int64_t Dfmt = DFMT_UNDEF;
7493 int64_t Nfmt = NFMT_UNDEF;
7494
7495 // dfmt and nfmt can appear in either order, and each is optional.
7496 for (int I = 0; I < 2; ++I) {
7497 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
7498 return ParseStatus::Failure;
7499
7500 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
7501 return ParseStatus::Failure;
7502
7503 // Skip optional comma between dfmt/nfmt
7504 // but guard against 2 commas following each other.
7505 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
7506 !peekToken().is(AsmToken::Comma)) {
7507 trySkipToken(AsmToken::Comma);
7508 }
7509 }
7510
7511 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
7512 return ParseStatus::NoMatch;
7513
7514 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7515 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7516
7517 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7518 return ParseStatus::Success;
7519}
7520
7521ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
7522 using namespace llvm::AMDGPU::MTBUFFormat;
7523
7524 int64_t Fmt = UFMT_UNDEF;
7525
7526 if (!tryParseFmt("format", UFMT_MAX, Fmt))
7527 return ParseStatus::Failure;
7528
7529 if (Fmt == UFMT_UNDEF)
7530 return ParseStatus::NoMatch;
7531
7532 Format = Fmt;
7533 return ParseStatus::Success;
7534}
7535
7536bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
7537 int64_t &Nfmt,
7538 StringRef FormatStr,
7539 SMLoc Loc) {
7540 using namespace llvm::AMDGPU::MTBUFFormat;
7541 int64_t Format;
7542
7543 Format = getDfmt(FormatStr);
7544 if (Format != DFMT_UNDEF) {
7545 Dfmt = Format;
7546 return true;
7547 }
7548
7549 Format = getNfmt(FormatStr, getSTI());
7550 if (Format != NFMT_UNDEF) {
7551 Nfmt = Format;
7552 return true;
7553 }
7554
7555 Error(Loc, "unsupported format");
7556 return false;
7557}
7558
7559ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7560 SMLoc FormatLoc,
7561 int64_t &Format) {
7562 using namespace llvm::AMDGPU::MTBUFFormat;
7563
7564 int64_t Dfmt = DFMT_UNDEF;
7565 int64_t Nfmt = NFMT_UNDEF;
7566 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
7567 return ParseStatus::Failure;
7568
7569 if (trySkipToken(AsmToken::Comma)) {
7570 StringRef Str;
7571 SMLoc Loc = getLoc();
7572 if (!parseId(Str, "expected a format string") ||
7573 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
7574 return ParseStatus::Failure;
7575 if (Dfmt == DFMT_UNDEF)
7576 return Error(Loc, "duplicate numeric format");
7577 if (Nfmt == NFMT_UNDEF)
7578 return Error(Loc, "duplicate data format");
7579 }
7580
7581 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7582 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7583
7584 if (isGFX10Plus()) {
7585 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
7586 if (Ufmt == UFMT_UNDEF)
7587 return Error(FormatLoc, "unsupported format");
7588 Format = Ufmt;
7589 } else {
7590 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7591 }
7592
7593 return ParseStatus::Success;
7594}
7595
7596ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7597 SMLoc Loc,
7598 int64_t &Format) {
7599 using namespace llvm::AMDGPU::MTBUFFormat;
7600
7601 auto Id = getUnifiedFormat(FormatStr, getSTI());
7602 if (Id == UFMT_UNDEF)
7603 return ParseStatus::NoMatch;
7604
7605 if (!isGFX10Plus())
7606 return Error(Loc, "unified format is not supported on this GPU");
7607
7608 Format = Id;
7609 return ParseStatus::Success;
7610}
7611
7612ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
7613 using namespace llvm::AMDGPU::MTBUFFormat;
7614 SMLoc Loc = getLoc();
7615
7616 if (!parseExpr(Format))
7617 return ParseStatus::Failure;
7618 if (!isValidFormatEncoding(Format, getSTI()))
7619 return Error(Loc, "out of range format");
7620
7621 return ParseStatus::Success;
7622}
7623
7624ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
7625 using namespace llvm::AMDGPU::MTBUFFormat;
7626
7627 if (!trySkipId("format", AsmToken::Colon))
7628 return ParseStatus::NoMatch;
7629
7630 if (trySkipToken(AsmToken::LBrac)) {
7631 StringRef FormatStr;
7632 SMLoc Loc = getLoc();
7633 if (!parseId(FormatStr, "expected a format string"))
7634 return ParseStatus::Failure;
7635
7636 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
7637 if (Res.isNoMatch())
7638 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
7639 if (!Res.isSuccess())
7640 return Res;
7641
7642 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7643 return ParseStatus::Failure;
7644
7645 return ParseStatus::Success;
7646 }
7647
7648 return parseNumericFormat(Format);
7649}
7650
7651ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
7652 using namespace llvm::AMDGPU::MTBUFFormat;
7653
7654 int64_t Format = getDefaultFormatEncoding(getSTI());
7655 ParseStatus Res;
7656 SMLoc Loc = getLoc();
7657
7658 // Parse legacy format syntax.
7659 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
7660 if (Res.isFailure())
7661 return Res;
7662
7663 bool FormatFound = Res.isSuccess();
7664
7665 Operands.push_back(
7666 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7667
7668 if (FormatFound)
7669 trySkipToken(AsmToken::Comma);
7670
7671 if (isToken(AsmToken::EndOfStatement)) {
7672 // We are expecting an soffset operand,
7673 // but let matcher handle the error.
7674 return ParseStatus::Success;
7675 }
7676
7677 // Parse soffset.
7678 Res = parseRegOrImm(Operands);
7679 if (!Res.isSuccess())
7680 return Res;
7681
7682 trySkipToken(AsmToken::Comma);
7683
7684 if (!FormatFound) {
7685 Res = parseSymbolicOrNumericFormat(Format);
7686 if (Res.isFailure())
7687 return Res;
7688 if (Res.isSuccess()) {
7689 auto Size = Operands.size();
7690 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
7691 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7692 Op.setImm(Format);
7693 }
7694 return ParseStatus::Success;
7695 }
7696
7697 if (isId("format") && peekToken().is(AsmToken::Colon))
7698 return Error(getLoc(), "duplicate format");
7699 return ParseStatus::Success;
7700}
7701
7702ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
7703 ParseStatus Res =
7704 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
7705 if (Res.isNoMatch()) {
7706 Res = parseIntWithPrefix("inst_offset", Operands,
7707 AMDGPUOperand::ImmTyInstOffset);
7708 }
7709 return Res;
7710}
7711
7712ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
7713 ParseStatus Res =
7714 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
7715 if (Res.isNoMatch())
7716 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
7717 return Res;
7718}
7719
7720ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7721 ParseStatus Res =
7722 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7723 if (Res.isNoMatch()) {
7724 Res =
7725 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
7726 }
7727 return Res;
7728}
7729
7730//===----------------------------------------------------------------------===//
7731// Exp
7732//===----------------------------------------------------------------------===//
7733
7734void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7735 OptionalImmIndexMap OptionalIdx;
7736
7737 unsigned OperandIdx[4];
7738 unsigned EnMask = 0;
7739 int SrcIdx = 0;
7740
7741 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7742 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7743
7744 // Add the register arguments
7745 if (Op.isReg()) {
7746 assert(SrcIdx < 4);
7747 OperandIdx[SrcIdx] = Inst.size();
7748 Op.addRegOperands(Inst, 1);
7749 ++SrcIdx;
7750 continue;
7751 }
7752
7753 if (Op.isOff()) {
7754 assert(SrcIdx < 4);
7755 OperandIdx[SrcIdx] = Inst.size();
7757 ++SrcIdx;
7758 continue;
7759 }
7760
7761 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7762 Op.addImmOperands(Inst, 1);
7763 continue;
7764 }
7765
7766 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7767 continue;
7768
7769 // Handle optional arguments
7770 OptionalIdx[Op.getImmTy()] = i;
7771 }
7772
7773 assert(SrcIdx == 4);
7774
7775 bool Compr = false;
7776 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7777 Compr = true;
7778 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
7779 Inst.getOperand(OperandIdx[2]).setReg(MCRegister());
7780 Inst.getOperand(OperandIdx[3]).setReg(MCRegister());
7781 }
7782
7783 for (auto i = 0; i < SrcIdx; ++i) {
7784 if (Inst.getOperand(OperandIdx[i]).getReg()) {
7785 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7786 }
7787 }
7788
7789 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
7790 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
7791
7792 Inst.addOperand(MCOperand::createImm(EnMask));
7793}
7794
7795//===----------------------------------------------------------------------===//
7796// s_waitcnt
7797//===----------------------------------------------------------------------===//
7798
7799static bool
7801 const AMDGPU::IsaVersion ISA,
7802 int64_t &IntVal,
7803 int64_t CntVal,
7804 bool Saturate,
7805 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7806 unsigned (*decode)(const IsaVersion &Version, unsigned))
7807{
7808 bool Failed = false;
7809
7810 IntVal = encode(ISA, IntVal, CntVal);
7811 if (CntVal != decode(ISA, IntVal)) {
7812 if (Saturate) {
7813 IntVal = encode(ISA, IntVal, -1);
7814 } else {
7815 Failed = true;
7816 }
7817 }
7818 return Failed;
7819}
7820
7821bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7822
7823 SMLoc CntLoc = getLoc();
7824 StringRef CntName = getTokenStr();
7825
7826 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7827 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7828 return false;
7829
7830 int64_t CntVal;
7831 SMLoc ValLoc = getLoc();
7832 if (!parseExpr(CntVal))
7833 return false;
7834
7836
7837 bool Failed = true;
7838 bool Sat = CntName.ends_with("_sat");
7839
7840 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7841 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
7842 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7843 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
7844 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7845 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
7846 } else {
7847 Error(CntLoc, "invalid counter name " + CntName);
7848 return false;
7849 }
7850
7851 if (Failed) {
7852 Error(ValLoc, "too large value for " + CntName);
7853 return false;
7854 }
7855
7856 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7857 return false;
7858
7859 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7860 if (isToken(AsmToken::EndOfStatement)) {
7861 Error(getLoc(), "expected a counter name");
7862 return false;
7863 }
7864 }
7865
7866 return true;
7867}
7868
7869ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7871 int64_t Waitcnt = getWaitcntBitMask(ISA);
7872 SMLoc S = getLoc();
7873
7874 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7875 while (!isToken(AsmToken::EndOfStatement)) {
7876 if (!parseCnt(Waitcnt))
7877 return ParseStatus::Failure;
7878 }
7879 } else {
7880 if (!parseExpr(Waitcnt))
7881 return ParseStatus::Failure;
7882 }
7883
7884 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
7885 return ParseStatus::Success;
7886}
7887
7888bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7889 SMLoc FieldLoc = getLoc();
7890 StringRef FieldName = getTokenStr();
7891 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
7892 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7893 return false;
7894
7895 SMLoc ValueLoc = getLoc();
7896 StringRef ValueName = getTokenStr();
7897 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
7898 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
7899 return false;
7900
7901 unsigned Shift;
7902 if (FieldName == "instid0") {
7903 Shift = 0;
7904 } else if (FieldName == "instskip") {
7905 Shift = 4;
7906 } else if (FieldName == "instid1") {
7907 Shift = 7;
7908 } else {
7909 Error(FieldLoc, "invalid field name " + FieldName);
7910 return false;
7911 }
7912
7913 int Value;
7914 if (Shift == 4) {
7915 // Parse values for instskip.
7917 .Case("SAME", 0)
7918 .Case("NEXT", 1)
7919 .Case("SKIP_1", 2)
7920 .Case("SKIP_2", 3)
7921 .Case("SKIP_3", 4)
7922 .Case("SKIP_4", 5)
7923 .Default(-1);
7924 } else {
7925 // Parse values for instid0 and instid1.
7927 .Case("NO_DEP", 0)
7928 .Case("VALU_DEP_1", 1)
7929 .Case("VALU_DEP_2", 2)
7930 .Case("VALU_DEP_3", 3)
7931 .Case("VALU_DEP_4", 4)
7932 .Case("TRANS32_DEP_1", 5)
7933 .Case("TRANS32_DEP_2", 6)
7934 .Case("TRANS32_DEP_3", 7)
7935 .Case("FMA_ACCUM_CYCLE_1", 8)
7936 .Case("SALU_CYCLE_1", 9)
7937 .Case("SALU_CYCLE_2", 10)
7938 .Case("SALU_CYCLE_3", 11)
7939 .Default(-1);
7940 }
7941 if (Value < 0) {
7942 Error(ValueLoc, "invalid value name " + ValueName);
7943 return false;
7944 }
7945
7946 Delay |= Value << Shift;
7947 return true;
7948}
7949
7950ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7951 int64_t Delay = 0;
7952 SMLoc S = getLoc();
7953
7954 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7955 do {
7956 if (!parseDelay(Delay))
7957 return ParseStatus::Failure;
7958 } while (trySkipToken(AsmToken::Pipe));
7959 } else {
7960 if (!parseExpr(Delay))
7961 return ParseStatus::Failure;
7962 }
7963
7964 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
7965 return ParseStatus::Success;
7966}
7967
7968bool
7969AMDGPUOperand::isSWaitCnt() const {
7970 return isImm();
7971}
7972
7973bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7974
7975//===----------------------------------------------------------------------===//
7976// DepCtr
7977//===----------------------------------------------------------------------===//
7978
7979void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7980 StringRef DepCtrName) {
7981 switch (ErrorId) {
7982 case OPR_ID_UNKNOWN:
7983 Error(Loc, Twine("invalid counter name ", DepCtrName));
7984 return;
7985 case OPR_ID_UNSUPPORTED:
7986 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
7987 return;
7988 case OPR_ID_DUPLICATE:
7989 Error(Loc, Twine("duplicate counter name ", DepCtrName));
7990 return;
7991 case OPR_VAL_INVALID:
7992 Error(Loc, Twine("invalid value for ", DepCtrName));
7993 return;
7994 default:
7995 assert(false);
7996 }
7997}
7998
7999bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
8000
8001 using namespace llvm::AMDGPU::DepCtr;
8002
8003 SMLoc DepCtrLoc = getLoc();
8004 StringRef DepCtrName = getTokenStr();
8005
8006 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
8007 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
8008 return false;
8009
8010 int64_t ExprVal;
8011 if (!parseExpr(ExprVal))
8012 return false;
8013
8014 unsigned PrevOprMask = UsedOprMask;
8015 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
8016
8017 if (CntVal < 0) {
8018 depCtrError(DepCtrLoc, CntVal, DepCtrName);
8019 return false;
8020 }
8021
8022 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8023 return false;
8024
8025 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
8026 if (isToken(AsmToken::EndOfStatement)) {
8027 Error(getLoc(), "expected a counter name");
8028 return false;
8029 }
8030 }
8031
8032 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
8033 DepCtr = (DepCtr & ~CntValMask) | CntVal;
8034 return true;
8035}
8036
8037ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
8038 using namespace llvm::AMDGPU::DepCtr;
8039
8040 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
8041 SMLoc Loc = getLoc();
8042
8043 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
8044 unsigned UsedOprMask = 0;
8045 while (!isToken(AsmToken::EndOfStatement)) {
8046 if (!parseDepCtr(DepCtr, UsedOprMask))
8047 return ParseStatus::Failure;
8048 }
8049 } else {
8050 if (!parseExpr(DepCtr))
8051 return ParseStatus::Failure;
8052 }
8053
8054 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
8055 return ParseStatus::Success;
8056}
8057
8058bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
8059
8060//===----------------------------------------------------------------------===//
8061// hwreg
8062//===----------------------------------------------------------------------===//
8063
8064ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
8065 OperandInfoTy &Offset,
8066 OperandInfoTy &Width) {
8067 using namespace llvm::AMDGPU::Hwreg;
8068
8069 if (!trySkipId("hwreg", AsmToken::LParen))
8070 return ParseStatus::NoMatch;
8071
8072 // The register may be specified by name or using a numeric code
8073 HwReg.Loc = getLoc();
8074 if (isToken(AsmToken::Identifier) &&
8075 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8076 HwReg.IsSymbolic = true;
8077 lex(); // skip register name
8078 } else if (!parseExpr(HwReg.Val, "a register name")) {
8079 return ParseStatus::Failure;
8080 }
8081
8082 if (trySkipToken(AsmToken::RParen))
8083 return ParseStatus::Success;
8084
8085 // parse optional params
8086 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
8087 return ParseStatus::Failure;
8088
8089 Offset.Loc = getLoc();
8090 if (!parseExpr(Offset.Val))
8091 return ParseStatus::Failure;
8092
8093 if (!skipToken(AsmToken::Comma, "expected a comma"))
8094 return ParseStatus::Failure;
8095
8096 Width.Loc = getLoc();
8097 if (!parseExpr(Width.Val) ||
8098 !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8099 return ParseStatus::Failure;
8100
8101 return ParseStatus::Success;
8102}
8103
8104ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
8105 using namespace llvm::AMDGPU::Hwreg;
8106
8107 int64_t ImmVal = 0;
8108 SMLoc Loc = getLoc();
8109
8110 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
8111 HwregId::Default);
8112 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
8113 HwregOffset::Default);
8114 struct : StructuredOpField {
8115 using StructuredOpField::StructuredOpField;
8116 bool validate(AMDGPUAsmParser &Parser) const override {
8117 if (!isUIntN(Width, Val - 1))
8118 return Error(Parser, "only values from 1 to 32 are legal");
8119 return true;
8120 }
8121 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
8122 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
8123
8124 if (Res.isNoMatch())
8125 Res = parseHwregFunc(HwReg, Offset, Width);
8126
8127 if (Res.isSuccess()) {
8128 if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
8129 return ParseStatus::Failure;
8130 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
8131 }
8132
8133 if (Res.isNoMatch() &&
8134 parseExpr(ImmVal, "a hwreg macro, structured immediate"))
8136
8137 if (!Res.isSuccess())
8138 return ParseStatus::Failure;
8139
8140 if (!isUInt<16>(ImmVal))
8141 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8142 Operands.push_back(
8143 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
8144 return ParseStatus::Success;
8145}
8146
8147bool AMDGPUOperand::isHwreg() const {
8148 return isImmTy(ImmTyHwreg);
8149}
8150
8151//===----------------------------------------------------------------------===//
8152// sendmsg
8153//===----------------------------------------------------------------------===//
8154
8155bool
8156AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
8157 OperandInfoTy &Op,
8158 OperandInfoTy &Stream) {
8159 using namespace llvm::AMDGPU::SendMsg;
8160
8161 Msg.Loc = getLoc();
8162 if (isToken(AsmToken::Identifier) &&
8163 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8164 Msg.IsSymbolic = true;
8165 lex(); // skip message name
8166 } else if (!parseExpr(Msg.Val, "a message name")) {
8167 return false;
8168 }
8169
8170 if (trySkipToken(AsmToken::Comma)) {
8171 Op.IsDefined = true;
8172 Op.Loc = getLoc();
8173 if (isToken(AsmToken::Identifier) &&
8174 (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
8176 lex(); // skip operation name
8177 } else if (!parseExpr(Op.Val, "an operation name")) {
8178 return false;
8179 }
8180
8181 if (trySkipToken(AsmToken::Comma)) {
8182 Stream.IsDefined = true;
8183 Stream.Loc = getLoc();
8184 if (!parseExpr(Stream.Val))
8185 return false;
8186 }
8187 }
8188
8189 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
8190}
8191
8192bool
8193AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
8194 const OperandInfoTy &Op,
8195 const OperandInfoTy &Stream) {
8196 using namespace llvm::AMDGPU::SendMsg;
8197
8198 // Validation strictness depends on whether message is specified
8199 // in a symbolic or in a numeric form. In the latter case
8200 // only encoding possibility is checked.
8201 bool Strict = Msg.IsSymbolic;
8202
8203 if (Strict) {
8204 if (Msg.Val == OPR_ID_UNSUPPORTED) {
8205 Error(Msg.Loc, "specified message id is not supported on this GPU");
8206 return false;
8207 }
8208 } else {
8209 if (!isValidMsgId(Msg.Val, getSTI())) {
8210 Error(Msg.Loc, "invalid message id");
8211 return false;
8212 }
8213 }
8214 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
8215 if (Op.IsDefined) {
8216 Error(Op.Loc, "message does not support operations");
8217 } else {
8218 Error(Msg.Loc, "missing message operation");
8219 }
8220 return false;
8221 }
8222 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
8223 if (Op.Val == OPR_ID_UNSUPPORTED)
8224 Error(Op.Loc, "specified operation id is not supported on this GPU");
8225 else
8226 Error(Op.Loc, "invalid operation id");
8227 return false;
8228 }
8229 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
8230 Stream.IsDefined) {
8231 Error(Stream.Loc, "message operation does not support streams");
8232 return false;
8233 }
8234 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
8235 Error(Stream.Loc, "invalid message stream id");
8236 return false;
8237 }
8238 return true;
8239}
8240
8241ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
8242 using namespace llvm::AMDGPU::SendMsg;
8243
8244 int64_t ImmVal = 0;
8245 SMLoc Loc = getLoc();
8246
8247 if (trySkipId("sendmsg", AsmToken::LParen)) {
8248 OperandInfoTy Msg(OPR_ID_UNKNOWN);
8249 OperandInfoTy Op(OP_NONE_);
8250 OperandInfoTy Stream(STREAM_ID_NONE_);
8251 if (parseSendMsgBody(Msg, Op, Stream) &&
8252 validateSendMsg(Msg, Op, Stream)) {
8253 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
8254 } else {
8255 return ParseStatus::Failure;
8256 }
8257 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
8258 if (ImmVal < 0 || !isUInt<16>(ImmVal))
8259 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8260 } else {
8261 return ParseStatus::Failure;
8262 }
8263
8264 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
8265 return ParseStatus::Success;
8266}
8267
8268bool AMDGPUOperand::isSendMsg() const {
8269 return isImmTy(ImmTySendMsg);
8270}
8271
8272//===----------------------------------------------------------------------===//
8273// v_interp
8274//===----------------------------------------------------------------------===//
8275
8276ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
8277 StringRef Str;
8278 SMLoc S = getLoc();
8279
8280 if (!parseId(Str))
8281 return ParseStatus::NoMatch;
8282
8283 int Slot = StringSwitch<int>(Str)
8284 .Case("p10", 0)
8285 .Case("p20", 1)
8286 .Case("p0", 2)
8287 .Default(-1);
8288
8289 if (Slot == -1)
8290 return Error(S, "invalid interpolation slot");
8291
8292 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
8293 AMDGPUOperand::ImmTyInterpSlot));
8294 return ParseStatus::Success;
8295}
8296
8297ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
8298 StringRef Str;
8299 SMLoc S = getLoc();
8300
8301 if (!parseId(Str))
8302 return ParseStatus::NoMatch;
8303
8304 if (!Str.starts_with("attr"))
8305 return Error(S, "invalid interpolation attribute");
8306
8307 StringRef Chan = Str.take_back(2);
8308 int AttrChan = StringSwitch<int>(Chan)
8309 .Case(".x", 0)
8310 .Case(".y", 1)
8311 .Case(".z", 2)
8312 .Case(".w", 3)
8313 .Default(-1);
8314 if (AttrChan == -1)
8315 return Error(S, "invalid or missing interpolation attribute channel");
8316
8317 Str = Str.drop_back(2).drop_front(4);
8318
8319 uint8_t Attr;
8320 if (Str.getAsInteger(10, Attr))
8321 return Error(S, "invalid or missing interpolation attribute number");
8322
8323 if (Attr > 32)
8324 return Error(S, "out of bounds interpolation attribute number");
8325
8326 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
8327
8328 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
8329 AMDGPUOperand::ImmTyInterpAttr));
8330 Operands.push_back(AMDGPUOperand::CreateImm(
8331 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
8332 return ParseStatus::Success;
8333}
8334
8335//===----------------------------------------------------------------------===//
8336// exp
8337//===----------------------------------------------------------------------===//
8338
8339ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
8340 using namespace llvm::AMDGPU::Exp;
8341
8342 StringRef Str;
8343 SMLoc S = getLoc();
8344
8345 if (!parseId(Str))
8346 return ParseStatus::NoMatch;
8347
8348 unsigned Id = getTgtId(Str);
8349 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
8350 return Error(S, (Id == ET_INVALID)
8351 ? "invalid exp target"
8352 : "exp target is not supported on this GPU");
8353
8354 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
8355 AMDGPUOperand::ImmTyExpTgt));
8356 return ParseStatus::Success;
8357}
8358
8359//===----------------------------------------------------------------------===//
8360// parser helpers
8361//===----------------------------------------------------------------------===//
8362
8363bool
8364AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
8365 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
8366}
8367
8368bool
8369AMDGPUAsmParser::isId(const StringRef Id) const {
8370 return isId(getToken(), Id);
8371}
8372
8373bool
8374AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
8375 return getTokenKind() == Kind;
8376}
8377
8378StringRef AMDGPUAsmParser::getId() const {
8379 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
8380}
8381
8382bool
8383AMDGPUAsmParser::trySkipId(const StringRef Id) {
8384 if (isId(Id)) {
8385 lex();
8386 return true;
8387 }
8388 return false;
8389}
8390
8391bool
8392AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
8393 if (isToken(AsmToken::Identifier)) {
8394 StringRef Tok = getTokenStr();
8395 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
8396 lex();
8397 return true;
8398 }
8399 }
8400 return false;
8401}
8402
8403bool
8404AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
8405 if (isId(Id) && peekToken().is(Kind)) {
8406 lex();
8407 lex();
8408 return true;
8409 }
8410 return false;
8411}
8412
8413bool
8414AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
8415 if (isToken(Kind)) {
8416 lex();
8417 return true;
8418 }
8419 return false;
8420}
8421
8422bool
8423AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
8424 const StringRef ErrMsg) {
8425 if (!trySkipToken(Kind)) {
8426 Error(getLoc(), ErrMsg);
8427 return false;
8428 }
8429 return true;
8430}
8431
8432bool
8433AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
8434 SMLoc S = getLoc();
8435
8436 const MCExpr *Expr;
8437 if (Parser.parseExpression(Expr))
8438 return false;
8439
8440 if (Expr->evaluateAsAbsolute(Imm))
8441 return true;
8442
8443 if (Expected.empty()) {
8444 Error(S, "expected absolute expression");
8445 } else {
8446 Error(S, Twine("expected ", Expected) +
8447 Twine(" or an absolute expression"));
8448 }
8449 return false;
8450}
8451
8452bool
8453AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
8454 SMLoc S = getLoc();
8455
8456 const MCExpr *Expr;
8457 if (Parser.parseExpression(Expr))
8458 return false;
8459
8460 int64_t IntVal;
8461 if (Expr->evaluateAsAbsolute(IntVal)) {
8462 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
8463 } else {
8464 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
8465 }
8466 return true;
8467}
8468
8469bool
8470AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
8471 if (isToken(AsmToken::String)) {
8472 Val = getToken().getStringContents();
8473 lex();
8474 return true;
8475 }
8476 Error(getLoc(), ErrMsg);
8477 return false;
8478}
8479
8480bool
8481AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
8482 if (isToken(AsmToken::Identifier)) {
8483 Val = getTokenStr();
8484 lex();
8485 return true;
8486 }
8487 if (!ErrMsg.empty())
8488 Error(getLoc(), ErrMsg);
8489 return false;
8490}
8491
8493AMDGPUAsmParser::getToken() const {
8494 return Parser.getTok();
8495}
8496
8497AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
8498 return isToken(AsmToken::EndOfStatement)
8499 ? getToken()
8500 : getLexer().peekTok(ShouldSkipSpace);
8501}
8502
8503void
8504AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
8505 auto TokCount = getLexer().peekTokens(Tokens);
8506
8507 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
8508 Tokens[Idx] = AsmToken(AsmToken::Error, "");
8509}
8510
8512AMDGPUAsmParser::getTokenKind() const {
8513 return getLexer().getKind();
8514}
8515
8516SMLoc
8517AMDGPUAsmParser::getLoc() const {
8518 return getToken().getLoc();
8519}
8520
8522AMDGPUAsmParser::getTokenStr() const {
8523 return getToken().getString();
8524}
8525
8526void
8527AMDGPUAsmParser::lex() {
8528 Parser.Lex();
8529}
8530
8531SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
8532 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
8533}
8534
8535SMLoc
8536AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
8537 const OperandVector &Operands) const {
8538 for (unsigned i = Operands.size() - 1; i > 0; --i) {
8539 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8540 if (Test(Op))
8541 return Op.getStartLoc();
8542 }
8543 return getInstLoc(Operands);
8544}
8545
8546SMLoc
8547AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
8548 const OperandVector &Operands) const {
8549 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
8550 return getOperandLoc(Test, Operands);
8551}
8552
8553SMLoc AMDGPUAsmParser::getRegLoc(MCRegister Reg,
8554 const OperandVector &Operands) const {
8555 auto Test = [=](const AMDGPUOperand& Op) {
8556 return Op.isRegKind() && Op.getReg() == Reg;
8557 };
8558 return getOperandLoc(Test, Operands);
8559}
8560
8561SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands,
8562 bool SearchMandatoryLiterals) const {
8563 auto Test = [](const AMDGPUOperand& Op) {
8564 return Op.IsImmKindLiteral() || Op.isExpr();
8565 };
8566 SMLoc Loc = getOperandLoc(Test, Operands);
8567 if (SearchMandatoryLiterals && Loc == getInstLoc(Operands))
8568 Loc = getMandatoryLitLoc(Operands);
8569 return Loc;
8570}
8571
8572SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const {
8573 auto Test = [](const AMDGPUOperand &Op) {
8574 return Op.IsImmKindMandatoryLiteral();
8575 };
8576 return getOperandLoc(Test, Operands);
8577}
8578
8579SMLoc
8580AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
8581 auto Test = [](const AMDGPUOperand& Op) {
8582 return Op.isImmKindConst();
8583 };
8584 return getOperandLoc(Test, Operands);
8585}
8586
8588AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
8589 if (!trySkipToken(AsmToken::LCurly))
8590 return ParseStatus::NoMatch;
8591
8592 bool First = true;
8593 while (!trySkipToken(AsmToken::RCurly)) {
8594 if (!First &&
8595 !skipToken(AsmToken::Comma, "comma or closing brace expected"))
8596 return ParseStatus::Failure;
8597
8598 StringRef Id = getTokenStr();
8599 SMLoc IdLoc = getLoc();
8600 if (!skipToken(AsmToken::Identifier, "field name expected") ||
8601 !skipToken(AsmToken::Colon, "colon expected"))
8602 return ParseStatus::Failure;
8603
8604 const auto *I =
8605 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
8606 if (I == Fields.end())
8607 return Error(IdLoc, "unknown field");
8608 if ((*I)->IsDefined)
8609 return Error(IdLoc, "duplicate field");
8610
8611 // TODO: Support symbolic values.
8612 (*I)->Loc = getLoc();
8613 if (!parseExpr((*I)->Val))
8614 return ParseStatus::Failure;
8615 (*I)->IsDefined = true;
8616
8617 First = false;
8618 }
8619 return ParseStatus::Success;
8620}
8621
8622bool AMDGPUAsmParser::validateStructuredOpFields(
8624 return all_of(Fields, [this](const StructuredOpField *F) {
8625 return F->validate(*this);
8626 });
8627}
8628
8629//===----------------------------------------------------------------------===//
8630// swizzle
8631//===----------------------------------------------------------------------===//
8632
8634static unsigned
8635encodeBitmaskPerm(const unsigned AndMask,
8636 const unsigned OrMask,
8637 const unsigned XorMask) {
8638 using namespace llvm::AMDGPU::Swizzle;
8639
8640 return BITMASK_PERM_ENC |
8641 (AndMask << BITMASK_AND_SHIFT) |
8642 (OrMask << BITMASK_OR_SHIFT) |
8643 (XorMask << BITMASK_XOR_SHIFT);
8644}
8645
8646bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
8647 const unsigned MaxVal,
8648 const Twine &ErrMsg, SMLoc &Loc) {
8649 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8650 return false;
8651 }
8652 Loc = getLoc();
8653 if (!parseExpr(Op)) {
8654 return false;
8655 }
8656 if (Op < MinVal || Op > MaxVal) {
8657 Error(Loc, ErrMsg);
8658 return false;
8659 }
8660
8661 return true;
8662}
8663
8664bool
8665AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
8666 const unsigned MinVal,
8667 const unsigned MaxVal,
8668 const StringRef ErrMsg) {
8669 SMLoc Loc;
8670 for (unsigned i = 0; i < OpNum; ++i) {
8671 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
8672 return false;
8673 }
8674
8675 return true;
8676}
8677
8678bool
8679AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
8680 using namespace llvm::AMDGPU::Swizzle;
8681
8682 int64_t Lane[LANE_NUM];
8683 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8684 "expected a 2-bit lane id")) {
8686 for (unsigned I = 0; I < LANE_NUM; ++I) {
8687 Imm |= Lane[I] << (LANE_SHIFT * I);
8688 }
8689 return true;
8690 }
8691 return false;
8692}
8693
8694bool
8695AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
8696 using namespace llvm::AMDGPU::Swizzle;
8697
8698 SMLoc Loc;
8699 int64_t GroupSize;
8700 int64_t LaneIdx;
8701
8702 if (!parseSwizzleOperand(GroupSize,
8703 2, 32,
8704 "group size must be in the interval [2,32]",
8705 Loc)) {
8706 return false;
8707 }
8708 if (!isPowerOf2_64(GroupSize)) {
8709 Error(Loc, "group size must be a power of two");
8710 return false;
8711 }
8712 if (parseSwizzleOperand(LaneIdx,
8713 0, GroupSize - 1,
8714 "lane id must be in the interval [0,group size - 1]",
8715 Loc)) {
8716 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
8717 return true;
8718 }
8719 return false;
8720}
8721
8722bool
8723AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8724 using namespace llvm::AMDGPU::Swizzle;
8725
8726 SMLoc Loc;
8727 int64_t GroupSize;
8728
8729 if (!parseSwizzleOperand(GroupSize,
8730 2, 32,
8731 "group size must be in the interval [2,32]",
8732 Loc)) {
8733 return false;
8734 }
8735 if (!isPowerOf2_64(GroupSize)) {
8736 Error(Loc, "group size must be a power of two");
8737 return false;
8738 }
8739
8740 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
8741 return true;
8742}
8743
8744bool
8745AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8746 using namespace llvm::AMDGPU::Swizzle;
8747
8748 SMLoc Loc;
8749 int64_t GroupSize;
8750
8751 if (!parseSwizzleOperand(GroupSize,
8752 1, 16,
8753 "group size must be in the interval [1,16]",
8754 Loc)) {
8755 return false;
8756 }
8757 if (!isPowerOf2_64(GroupSize)) {
8758 Error(Loc, "group size must be a power of two");
8759 return false;
8760 }
8761
8762 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
8763 return true;
8764}
8765
8766bool
8767AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8768 using namespace llvm::AMDGPU::Swizzle;
8769
8770 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8771 return false;
8772 }
8773
8774 StringRef Ctl;
8775 SMLoc StrLoc = getLoc();
8776 if (!parseString(Ctl)) {
8777 return false;
8778 }
8779 if (Ctl.size() != BITMASK_WIDTH) {
8780 Error(StrLoc, "expected a 5-character mask");
8781 return false;
8782 }
8783
8784 unsigned AndMask = 0;
8785 unsigned OrMask = 0;
8786 unsigned XorMask = 0;
8787
8788 for (size_t i = 0; i < Ctl.size(); ++i) {
8789 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8790 switch(Ctl[i]) {
8791 default:
8792 Error(StrLoc, "invalid mask");
8793 return false;
8794 case '0':
8795 break;
8796 case '1':
8797 OrMask |= Mask;
8798 break;
8799 case 'p':
8800 AndMask |= Mask;
8801 break;
8802 case 'i':
8803 AndMask |= Mask;
8804 XorMask |= Mask;
8805 break;
8806 }
8807 }
8808
8809 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8810 return true;
8811}
8812
8813bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &Imm) {
8814 using namespace llvm::AMDGPU::Swizzle;
8815
8816 if (!AMDGPU::isGFX9Plus(getSTI())) {
8817 Error(getLoc(), "FFT mode swizzle not supported on this GPU");
8818 return false;
8819 }
8820
8821 int64_t Swizzle;
8822 SMLoc Loc;
8823 if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
8824 "FFT swizzle must be in the interval [0," +
8825 Twine(FFT_SWIZZLE_MAX) + Twine(']'),
8826 Loc))
8827 return false;
8828
8829 Imm = FFT_MODE_ENC | Swizzle;
8830 return true;
8831}
8832
8833bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &Imm) {
8834 using namespace llvm::AMDGPU::Swizzle;
8835
8836 if (!AMDGPU::isGFX9Plus(getSTI())) {
8837 Error(getLoc(), "Rotate mode swizzle not supported on this GPU");
8838 return false;
8839 }
8840
8841 SMLoc Loc;
8842 int64_t Direction;
8843
8844 if (!parseSwizzleOperand(Direction, 0, 1,
8845 "direction must be 0 (left) or 1 (right)", Loc))
8846 return false;
8847
8848 int64_t RotateSize;
8849 if (!parseSwizzleOperand(
8850 RotateSize, 0, ROTATE_MAX_SIZE,
8851 "number of threads to rotate must be in the interval [0," +
8852 Twine(ROTATE_MAX_SIZE) + Twine(']'),
8853 Loc))
8854 return false;
8855
8857 (RotateSize << ROTATE_SIZE_SHIFT);
8858 return true;
8859}
8860
8861bool
8862AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8863
8864 SMLoc OffsetLoc = getLoc();
8865
8866 if (!parseExpr(Imm, "a swizzle macro")) {
8867 return false;
8868 }
8869 if (!isUInt<16>(Imm)) {
8870 Error(OffsetLoc, "expected a 16-bit offset");
8871 return false;
8872 }
8873 return true;
8874}
8875
8876bool
8877AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8878 using namespace llvm::AMDGPU::Swizzle;
8879
8880 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
8881
8882 SMLoc ModeLoc = getLoc();
8883 bool Ok = false;
8884
8885 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8886 Ok = parseSwizzleQuadPerm(Imm);
8887 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8888 Ok = parseSwizzleBitmaskPerm(Imm);
8889 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8890 Ok = parseSwizzleBroadcast(Imm);
8891 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
8892 Ok = parseSwizzleSwap(Imm);
8893 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8894 Ok = parseSwizzleReverse(Imm);
8895 } else if (trySkipId(IdSymbolic[ID_FFT])) {
8896 Ok = parseSwizzleFFT(Imm);
8897 } else if (trySkipId(IdSymbolic[ID_ROTATE])) {
8898 Ok = parseSwizzleRotate(Imm);
8899 } else {
8900 Error(ModeLoc, "expected a swizzle mode");
8901 }
8902
8903 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
8904 }
8905
8906 return false;
8907}
8908
8909ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8910 SMLoc S = getLoc();
8911 int64_t Imm = 0;
8912
8913 if (trySkipId("offset")) {
8914
8915 bool Ok = false;
8916 if (skipToken(AsmToken::Colon, "expected a colon")) {
8917 if (trySkipId("swizzle")) {
8918 Ok = parseSwizzleMacro(Imm);
8919 } else {
8920 Ok = parseSwizzleOffset(Imm);
8921 }
8922 }
8923
8924 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8925
8927 }
8928 return ParseStatus::NoMatch;
8929}
8930
8931bool
8932AMDGPUOperand::isSwizzle() const {
8933 return isImmTy(ImmTySwizzle);
8934}
8935
8936//===----------------------------------------------------------------------===//
8937// VGPR Index Mode
8938//===----------------------------------------------------------------------===//
8939
8940int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8941
8942 using namespace llvm::AMDGPU::VGPRIndexMode;
8943
8944 if (trySkipToken(AsmToken::RParen)) {
8945 return OFF;
8946 }
8947
8948 int64_t Imm = 0;
8949
8950 while (true) {
8951 unsigned Mode = 0;
8952 SMLoc S = getLoc();
8953
8954 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8955 if (trySkipId(IdSymbolic[ModeId])) {
8956 Mode = 1 << ModeId;
8957 break;
8958 }
8959 }
8960
8961 if (Mode == 0) {
8962 Error(S, (Imm == 0)?
8963 "expected a VGPR index mode or a closing parenthesis" :
8964 "expected a VGPR index mode");
8965 return UNDEF;
8966 }
8967
8968 if (Imm & Mode) {
8969 Error(S, "duplicate VGPR index mode");
8970 return UNDEF;
8971 }
8972 Imm |= Mode;
8973
8974 if (trySkipToken(AsmToken::RParen))
8975 break;
8976 if (!skipToken(AsmToken::Comma,
8977 "expected a comma or a closing parenthesis"))
8978 return UNDEF;
8979 }
8980
8981 return Imm;
8982}
8983
8984ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
8985
8986 using namespace llvm::AMDGPU::VGPRIndexMode;
8987
8988 int64_t Imm = 0;
8989 SMLoc S = getLoc();
8990
8991 if (trySkipId("gpr_idx", AsmToken::LParen)) {
8992 Imm = parseGPRIdxMacro();
8993 if (Imm == UNDEF)
8994 return ParseStatus::Failure;
8995 } else {
8996 if (getParser().parseAbsoluteExpression(Imm))
8997 return ParseStatus::Failure;
8998 if (Imm < 0 || !isUInt<4>(Imm))
8999 return Error(S, "invalid immediate: only 4-bit values are legal");
9000 }
9001
9002 Operands.push_back(
9003 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
9004 return ParseStatus::Success;
9005}
9006
9007bool AMDGPUOperand::isGPRIdxMode() const {
9008 return isImmTy(ImmTyGprIdxMode);
9009}
9010
9011//===----------------------------------------------------------------------===//
9012// sopp branch targets
9013//===----------------------------------------------------------------------===//
9014
9015ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
9016
9017 // Make sure we are not parsing something
9018 // that looks like a label or an expression but is not.
9019 // This will improve error messages.
9020 if (isRegister() || isModifier())
9021 return ParseStatus::NoMatch;
9022
9023 if (!parseExpr(Operands))
9024 return ParseStatus::Failure;
9025
9026 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
9027 assert(Opr.isImm() || Opr.isExpr());
9028 SMLoc Loc = Opr.getStartLoc();
9029
9030 // Currently we do not support arbitrary expressions as branch targets.
9031 // Only labels and absolute expressions are accepted.
9032 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
9033 Error(Loc, "expected an absolute expression or a label");
9034 } else if (Opr.isImm() && !Opr.isS16Imm()) {
9035 Error(Loc, "expected a 16-bit signed jump offset");
9036 }
9037
9038 return ParseStatus::Success;
9039}
9040
9041//===----------------------------------------------------------------------===//
9042// Boolean holding registers
9043//===----------------------------------------------------------------------===//
9044
9045ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
9046 return parseReg(Operands);
9047}
9048
9049//===----------------------------------------------------------------------===//
9050// mubuf
9051//===----------------------------------------------------------------------===//
9052
9053void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
9054 const OperandVector &Operands,
9055 bool IsAtomic) {
9056 OptionalImmIndexMap OptionalIdx;
9057 unsigned FirstOperandIdx = 1;
9058 bool IsAtomicReturn = false;
9059
9060 if (IsAtomic) {
9061 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
9063 }
9064
9065 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
9066 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9067
9068 // Add the register arguments
9069 if (Op.isReg()) {
9070 Op.addRegOperands(Inst, 1);
9071 // Insert a tied src for atomic return dst.
9072 // This cannot be postponed as subsequent calls to
9073 // addImmOperands rely on correct number of MC operands.
9074 if (IsAtomicReturn && i == FirstOperandIdx)
9075 Op.addRegOperands(Inst, 1);
9076 continue;
9077 }
9078
9079 // Handle the case where soffset is an immediate
9080 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
9081 Op.addImmOperands(Inst, 1);
9082 continue;
9083 }
9084
9085 // Handle tokens like 'offen' which are sometimes hard-coded into the
9086 // asm string. There are no MCInst operands for these.
9087 if (Op.isToken()) {
9088 continue;
9089 }
9090 assert(Op.isImm());
9091
9092 // Handle optional arguments
9093 OptionalIdx[Op.getImmTy()] = i;
9094 }
9095
9096 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
9097 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
9098}
9099
9100//===----------------------------------------------------------------------===//
9101// smrd
9102//===----------------------------------------------------------------------===//
9103
9104bool AMDGPUOperand::isSMRDOffset8() const {
9105 return isImmLiteral() && isUInt<8>(getImm());
9106}
9107
9108bool AMDGPUOperand::isSMEMOffset() const {
9109 // Offset range is checked later by validator.
9110 return isImmLiteral();
9111}
9112
9113bool AMDGPUOperand::isSMRDLiteralOffset() const {
9114 // 32-bit literals are only supported on CI and we only want to use them
9115 // when the offset is > 8-bits.
9116 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
9117}
9118
9119//===----------------------------------------------------------------------===//
9120// vop3
9121//===----------------------------------------------------------------------===//
9122
9123static bool ConvertOmodMul(int64_t &Mul) {
9124 if (Mul != 1 && Mul != 2 && Mul != 4)
9125 return false;
9126
9127 Mul >>= 1;
9128 return true;
9129}
9130
9131static bool ConvertOmodDiv(int64_t &Div) {
9132 if (Div == 1) {
9133 Div = 0;
9134 return true;
9135 }
9136
9137 if (Div == 2) {
9138 Div = 3;
9139 return true;
9140 }
9141
9142 return false;
9143}
9144
9145// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
9146// This is intentional and ensures compatibility with sp3.
9147// See bug 35397 for details.
9148bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
9149 if (BoundCtrl == 0 || BoundCtrl == 1) {
9150 if (!isGFX11Plus())
9151 BoundCtrl = 1;
9152 return true;
9153 }
9154 return false;
9155}
9156
9157void AMDGPUAsmParser::onBeginOfFile() {
9158 if (!getParser().getStreamer().getTargetStreamer() ||
9159 getSTI().getTargetTriple().getArch() == Triple::r600)
9160 return;
9161
9162 if (!getTargetStreamer().getTargetID())
9163 getTargetStreamer().initializeTargetID(getSTI(),
9164 getSTI().getFeatureString());
9165
9166 if (isHsaAbi(getSTI()))
9167 getTargetStreamer().EmitDirectiveAMDGCNTarget();
9168}
9169
9170/// Parse AMDGPU specific expressions.
9171///
9172/// expr ::= or(expr, ...) |
9173/// max(expr, ...)
9174///
9175bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
9176 using AGVK = AMDGPUMCExpr::VariantKind;
9177
9178 if (isToken(AsmToken::Identifier)) {
9179 StringRef TokenId = getTokenStr();
9180 AGVK VK = StringSwitch<AGVK>(TokenId)
9181 .Case("max", AGVK::AGVK_Max)
9182 .Case("or", AGVK::AGVK_Or)
9183 .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs)
9184 .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
9185 .Case("alignto", AGVK::AGVK_AlignTo)
9186 .Case("occupancy", AGVK::AGVK_Occupancy)
9187 .Default(AGVK::AGVK_None);
9188
9189 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
9191 uint64_t CommaCount = 0;
9192 lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
9193 lex(); // Eat '('
9194 while (true) {
9195 if (trySkipToken(AsmToken::RParen)) {
9196 if (Exprs.empty()) {
9197 Error(getToken().getLoc(),
9198 "empty " + Twine(TokenId) + " expression");
9199 return true;
9200 }
9201 if (CommaCount + 1 != Exprs.size()) {
9202 Error(getToken().getLoc(),
9203 "mismatch of commas in " + Twine(TokenId) + " expression");
9204 return true;
9205 }
9206 Res = AMDGPUMCExpr::create(VK, Exprs, getContext());
9207 return false;
9208 }
9209 const MCExpr *Expr;
9210 if (getParser().parseExpression(Expr, EndLoc))
9211 return true;
9212 Exprs.push_back(Expr);
9213 bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
9214 if (LastTokenWasComma)
9215 CommaCount++;
9216 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
9217 Error(getToken().getLoc(),
9218 "unexpected token in " + Twine(TokenId) + " expression");
9219 return true;
9220 }
9221 }
9222 }
9223 }
9224 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
9225}
9226
9227ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
9228 StringRef Name = getTokenStr();
9229 if (Name == "mul") {
9230 return parseIntWithPrefix("mul", Operands,
9231 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
9232 }
9233
9234 if (Name == "div") {
9235 return parseIntWithPrefix("div", Operands,
9236 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
9237 }
9238
9239 return ParseStatus::NoMatch;
9240}
9241
9242// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
9243// the number of src operands present, then copies that bit into src0_modifiers.
9244static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
9245 int Opc = Inst.getOpcode();
9246 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9247 if (OpSelIdx == -1)
9248 return;
9249
9250 int SrcNum;
9251 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9252 AMDGPU::OpName::src2};
9253 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
9254 ++SrcNum)
9255 ;
9256 assert(SrcNum > 0);
9257
9258 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9259
9260 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
9261 if (DstIdx == -1)
9262 return;
9263
9264 const MCOperand &DstOp = Inst.getOperand(DstIdx);
9265 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
9266 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9267 if (DstOp.isReg() &&
9268 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
9270 ModVal |= SISrcMods::DST_OP_SEL;
9271 } else {
9272 if ((OpSel & (1 << SrcNum)) != 0)
9273 ModVal |= SISrcMods::DST_OP_SEL;
9274 }
9275 Inst.getOperand(ModIdx).setImm(ModVal);
9276}
9277
9278void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
9279 const OperandVector &Operands) {
9280 cvtVOP3P(Inst, Operands);
9281 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9282}
9283
9284void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
9285 OptionalImmIndexMap &OptionalIdx) {
9286 cvtVOP3P(Inst, Operands, OptionalIdx);
9287 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9288}
9289
9290static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
9291 return
9292 // 1. This operand is input modifiers
9293 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
9294 // 2. This is not last operand
9295 && Desc.NumOperands > (OpNum + 1)
9296 // 3. Next operand is register class
9297 && Desc.operands()[OpNum + 1].RegClass != -1
9298 // 4. Next register is not tied to any other operand
9299 && Desc.getOperandConstraint(OpNum + 1,
9300 MCOI::OperandConstraint::TIED_TO) == -1;
9301}
9302
9303void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
9304{
9305 OptionalImmIndexMap OptionalIdx;
9306 unsigned Opc = Inst.getOpcode();
9307
9308 unsigned I = 1;
9309 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9310 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9311 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9312 }
9313
9314 for (unsigned E = Operands.size(); I != E; ++I) {
9315 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9317 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9318 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
9319 Op.isInterpAttrChan()) {
9320 Inst.addOperand(MCOperand::createImm(Op.getImm()));
9321 } else if (Op.isImmModifier()) {
9322 OptionalIdx[Op.getImmTy()] = I;
9323 } else {
9324 llvm_unreachable("unhandled operand type");
9325 }
9326 }
9327
9328 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
9329 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9330 AMDGPUOperand::ImmTyHigh);
9331
9332 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9333 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9334 AMDGPUOperand::ImmTyClamp);
9335
9336 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9337 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9338 AMDGPUOperand::ImmTyOModSI);
9339}
9340
9341void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
9342{
9343 OptionalImmIndexMap OptionalIdx;
9344 unsigned Opc = Inst.getOpcode();
9345
9346 unsigned I = 1;
9347 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9348 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9349 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9350 }
9351
9352 for (unsigned E = Operands.size(); I != E; ++I) {
9353 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9355 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9356 } else if (Op.isImmModifier()) {
9357 OptionalIdx[Op.getImmTy()] = I;
9358 } else {
9359 llvm_unreachable("unhandled operand type");
9360 }
9361 }
9362
9363 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp);
9364
9365 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9366 if (OpSelIdx != -1)
9367 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9368
9369 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
9370
9371 if (OpSelIdx == -1)
9372 return;
9373
9374 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9375 AMDGPU::OpName::src2};
9376 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9377 AMDGPU::OpName::src1_modifiers,
9378 AMDGPU::OpName::src2_modifiers};
9379
9380 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9381
9382 for (int J = 0; J < 3; ++J) {
9383 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9384 if (OpIdx == -1)
9385 break;
9386
9387 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9388 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9389
9390 if ((OpSel & (1 << J)) != 0)
9391 ModVal |= SISrcMods::OP_SEL_0;
9392 if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
9393 (OpSel & (1 << 3)) != 0)
9394 ModVal |= SISrcMods::DST_OP_SEL;
9395
9396 Inst.getOperand(ModIdx).setImm(ModVal);
9397 }
9398}
9399void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
9400 const OperandVector &Operands) {
9401 OptionalImmIndexMap OptionalIdx;
9402 unsigned Opc = Inst.getOpcode();
9403 unsigned I = 1;
9404 int CbszOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
9405
9406 const MCInstrDesc &Desc = MII.get(Opc);
9407
9408 for (unsigned J = 0; J < Desc.getNumDefs(); ++J)
9409 static_cast<AMDGPUOperand &>(*Operands[I++]).addRegOperands(Inst, 1);
9410
9411 for (unsigned E = Operands.size(); I != E; ++I) {
9412 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[I]);
9413 int NumOperands = Inst.getNumOperands();
9414 // The order of operands in MCInst and parsed operands are different.
9415 // Adding dummy cbsz and blgp operands at corresponding MCInst operand
9416 // indices for parsing scale values correctly.
9417 if (NumOperands == CbszOpIdx) {
9420 }
9421 if (isRegOrImmWithInputMods(Desc, NumOperands)) {
9422 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9423 } else if (Op.isImmModifier()) {
9424 OptionalIdx[Op.getImmTy()] = I;
9425 } else {
9426 Op.addRegOrImmOperands(Inst, 1);
9427 }
9428 }
9429
9430 // Insert CBSZ and BLGP operands for F8F6F4 variants
9431 auto CbszIdx = OptionalIdx.find(AMDGPUOperand::ImmTyCBSZ);
9432 if (CbszIdx != OptionalIdx.end()) {
9433 int CbszVal = ((AMDGPUOperand &)*Operands[CbszIdx->second]).getImm();
9434 Inst.getOperand(CbszOpIdx).setImm(CbszVal);
9435 }
9436
9437 int BlgpOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
9438 auto BlgpIdx = OptionalIdx.find(AMDGPUOperand::ImmTyBLGP);
9439 if (BlgpIdx != OptionalIdx.end()) {
9440 int BlgpVal = ((AMDGPUOperand &)*Operands[BlgpIdx->second]).getImm();
9441 Inst.getOperand(BlgpOpIdx).setImm(BlgpVal);
9442 }
9443
9444 // Add dummy src_modifiers
9447
9448 // Handle op_sel fields
9449
9450 unsigned OpSel = 0;
9451 auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
9452 if (OpselIdx != OptionalIdx.end()) {
9453 OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])
9454 .getImm();
9455 }
9456
9457 unsigned OpSelHi = 0;
9458 auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
9459 if (OpselHiIdx != OptionalIdx.end()) {
9460 OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])
9461 .getImm();
9462 }
9463 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9464 AMDGPU::OpName::src1_modifiers};
9465
9466 for (unsigned J = 0; J < 2; ++J) {
9467 unsigned ModVal = 0;
9468 if (OpSel & (1 << J))
9469 ModVal |= SISrcMods::OP_SEL_0;
9470 if (OpSelHi & (1 << J))
9471 ModVal |= SISrcMods::OP_SEL_1;
9472
9473 const int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9474 Inst.getOperand(ModIdx).setImm(ModVal);
9475 }
9476}
9477
9478void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
9479 OptionalImmIndexMap &OptionalIdx) {
9480 unsigned Opc = Inst.getOpcode();
9481
9482 unsigned I = 1;
9483 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9484 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9485 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9486 }
9487
9488 for (unsigned E = Operands.size(); I != E; ++I) {
9489 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9491 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9492 } else if (Op.isImmModifier()) {
9493 OptionalIdx[Op.getImmTy()] = I;
9494 } else {
9495 Op.addRegOrImmOperands(Inst, 1);
9496 }
9497 }
9498
9499 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::scale_sel))
9500 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9501 AMDGPUOperand::ImmTyScaleSel);
9502
9503 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9504 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9505 AMDGPUOperand::ImmTyClamp);
9506
9507 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
9508 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
9509 Inst.addOperand(Inst.getOperand(0));
9510 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9511 AMDGPUOperand::ImmTyByteSel);
9512 }
9513
9514 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9515 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9516 AMDGPUOperand::ImmTyOModSI);
9517
9518 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
9519 // it has src2 register operand that is tied to dst operand
9520 // we don't allow modifiers for this operand in assembler so src2_modifiers
9521 // should be 0.
9522 if (isMAC(Opc)) {
9523 auto *it = Inst.begin();
9524 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
9525 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
9526 ++it;
9527 // Copy the operand to ensure it's not invalidated when Inst grows.
9528 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
9529 }
9530}
9531
9532void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
9533 OptionalImmIndexMap OptionalIdx;
9534 cvtVOP3(Inst, Operands, OptionalIdx);
9535}
9536
9537void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
9538 OptionalImmIndexMap &OptIdx) {
9539 const int Opc = Inst.getOpcode();
9540 const MCInstrDesc &Desc = MII.get(Opc);
9541
9542 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
9543
9544 if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
9545 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
9546 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
9547 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
9548 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
9549 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
9550 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
9551 Inst.addOperand(Inst.getOperand(0));
9552 }
9553
9554 // Adding vdst_in operand is already covered for these DPP instructions in
9555 // cvtVOP3DPP.
9556 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
9557 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx12 ||
9558 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx12 ||
9559 Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx12 ||
9560 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx12 ||
9561 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp_gfx12 ||
9562 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp_gfx12 ||
9563 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp8_gfx12 ||
9564 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp8_gfx12 ||
9565 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
9566 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9567 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp_gfx1250 ||
9568 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp8_gfx1250 ||
9569 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9570 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9571 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp_gfx1250 ||
9572 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp_gfx1250 ||
9573 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp8_gfx1250 ||
9574 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp8_gfx1250 ||
9575 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_gfx1250 ||
9576 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_gfx1250 ||
9577 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp_gfx1250 ||
9578 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp_gfx1250 ||
9579 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp8_gfx1250 ||
9580 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp8_gfx1250 ||
9581 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_gfx1250 ||
9582 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_gfx1250)) {
9583 Inst.addOperand(Inst.getOperand(0));
9584 }
9585
9586 int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::bitop3);
9587 if (BitOp3Idx != -1) {
9588 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9589 }
9590
9591 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
9592 // instruction, and then figure out where to actually put the modifiers
9593
9594 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9595 if (OpSelIdx != -1) {
9596 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
9597 }
9598
9599 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
9600 if (OpSelHiIdx != -1) {
9601 int DefaultVal = IsPacked ? -1 : 0;
9602 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
9603 DefaultVal);
9604 }
9605
9606 int MatrixAFMTIdx =
9607 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_fmt);
9608 if (MatrixAFMTIdx != -1) {
9609 addOptionalImmOperand(Inst, Operands, OptIdx,
9610 AMDGPUOperand::ImmTyMatrixAFMT, 0);
9611 }
9612
9613 int MatrixBFMTIdx =
9614 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_fmt);
9615 if (MatrixBFMTIdx != -1) {
9616 addOptionalImmOperand(Inst, Operands, OptIdx,
9617 AMDGPUOperand::ImmTyMatrixBFMT, 0);
9618 }
9619
9620 int MatrixAScaleIdx =
9621 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale);
9622 if (MatrixAScaleIdx != -1) {
9623 addOptionalImmOperand(Inst, Operands, OptIdx,
9624 AMDGPUOperand::ImmTyMatrixAScale, 0);
9625 }
9626
9627 int MatrixBScaleIdx =
9628 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale);
9629 if (MatrixBScaleIdx != -1) {
9630 addOptionalImmOperand(Inst, Operands, OptIdx,
9631 AMDGPUOperand::ImmTyMatrixBScale, 0);
9632 }
9633
9634 int MatrixAScaleFmtIdx =
9635 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale_fmt);
9636 if (MatrixAScaleFmtIdx != -1) {
9637 addOptionalImmOperand(Inst, Operands, OptIdx,
9638 AMDGPUOperand::ImmTyMatrixAScaleFmt, 0);
9639 }
9640
9641 int MatrixBScaleFmtIdx =
9642 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale_fmt);
9643 if (MatrixBScaleFmtIdx != -1) {
9644 addOptionalImmOperand(Inst, Operands, OptIdx,
9645 AMDGPUOperand::ImmTyMatrixBScaleFmt, 0);
9646 }
9647
9648 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_a_reuse))
9649 addOptionalImmOperand(Inst, Operands, OptIdx,
9650 AMDGPUOperand::ImmTyMatrixAReuse, 0);
9651
9652 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_b_reuse))
9653 addOptionalImmOperand(Inst, Operands, OptIdx,
9654 AMDGPUOperand::ImmTyMatrixBReuse, 0);
9655
9656 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
9657 if (NegLoIdx != -1)
9658 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
9659
9660 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
9661 if (NegHiIdx != -1)
9662 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
9663
9664 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9665 AMDGPU::OpName::src2};
9666 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9667 AMDGPU::OpName::src1_modifiers,
9668 AMDGPU::OpName::src2_modifiers};
9669
9670 unsigned OpSel = 0;
9671 unsigned OpSelHi = 0;
9672 unsigned NegLo = 0;
9673 unsigned NegHi = 0;
9674
9675 if (OpSelIdx != -1)
9676 OpSel = Inst.getOperand(OpSelIdx).getImm();
9677
9678 if (OpSelHiIdx != -1)
9679 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
9680
9681 if (NegLoIdx != -1)
9682 NegLo = Inst.getOperand(NegLoIdx).getImm();
9683
9684 if (NegHiIdx != -1)
9685 NegHi = Inst.getOperand(NegHiIdx).getImm();
9686
9687 for (int J = 0; J < 3; ++J) {
9688 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9689 if (OpIdx == -1)
9690 break;
9691
9692 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9693
9694 if (ModIdx == -1)
9695 continue;
9696
9697 uint32_t ModVal = 0;
9698
9699 const MCOperand &SrcOp = Inst.getOperand(OpIdx);
9700 if (SrcOp.isReg() && getMRI()
9701 ->getRegClass(AMDGPU::VGPR_16RegClassID)
9702 .contains(SrcOp.getReg())) {
9703 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(SrcOp.getReg(), *getMRI());
9704 if (VGPRSuffixIsHi)
9705 ModVal |= SISrcMods::OP_SEL_0;
9706 } else {
9707 if ((OpSel & (1 << J)) != 0)
9708 ModVal |= SISrcMods::OP_SEL_0;
9709 }
9710
9711 if ((OpSelHi & (1 << J)) != 0)
9712 ModVal |= SISrcMods::OP_SEL_1;
9713
9714 if ((NegLo & (1 << J)) != 0)
9715 ModVal |= SISrcMods::NEG;
9716
9717 if ((NegHi & (1 << J)) != 0)
9718 ModVal |= SISrcMods::NEG_HI;
9719
9720 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
9721 }
9722}
9723
9724void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
9725 OptionalImmIndexMap OptIdx;
9726 cvtVOP3(Inst, Operands, OptIdx);
9727 cvtVOP3P(Inst, Operands, OptIdx);
9728}
9729
9731 unsigned i, unsigned Opc,
9732 AMDGPU::OpName OpName) {
9733 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
9734 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
9735 else
9736 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
9737}
9738
9739void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
9740 unsigned Opc = Inst.getOpcode();
9741
9742 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
9743 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
9744 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
9745 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
9746 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
9747
9748 OptionalImmIndexMap OptIdx;
9749 for (unsigned i = 5; i < Operands.size(); ++i) {
9750 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9751 OptIdx[Op.getImmTy()] = i;
9752 }
9753
9754 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
9755 addOptionalImmOperand(Inst, Operands, OptIdx,
9756 AMDGPUOperand::ImmTyIndexKey8bit);
9757
9758 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
9759 addOptionalImmOperand(Inst, Operands, OptIdx,
9760 AMDGPUOperand::ImmTyIndexKey16bit);
9761
9762 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_32bit))
9763 addOptionalImmOperand(Inst, Operands, OptIdx,
9764 AMDGPUOperand::ImmTyIndexKey32bit);
9765
9766 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9767 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp);
9768
9769 cvtVOP3P(Inst, Operands, OptIdx);
9770}
9771
9772//===----------------------------------------------------------------------===//
9773// VOPD
9774//===----------------------------------------------------------------------===//
9775
9776ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
9777 if (!hasVOPD(getSTI()))
9778 return ParseStatus::NoMatch;
9779
9780 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
9781 SMLoc S = getLoc();
9782 lex();
9783 lex();
9784 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
9785 SMLoc OpYLoc = getLoc();
9786 StringRef OpYName;
9787 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
9788 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
9789 return ParseStatus::Success;
9790 }
9791 return Error(OpYLoc, "expected a VOPDY instruction after ::");
9792 }
9793 return ParseStatus::NoMatch;
9794}
9795
9796// Create VOPD MCInst operands using parsed assembler operands.
9797void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
9798 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9799
9800 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
9801 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
9803 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9804 return;
9805 }
9806 if (Op.isReg()) {
9807 Op.addRegOperands(Inst, 1);
9808 return;
9809 }
9810 if (Op.isImm()) {
9811 Op.addImmOperands(Inst, 1);
9812 return;
9813 }
9814 llvm_unreachable("Unhandled operand type in cvtVOPD");
9815 };
9816
9817 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
9818
9819 // MCInst operands are ordered as follows:
9820 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
9821
9822 for (auto CompIdx : VOPD::COMPONENTS) {
9823 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
9824 }
9825
9826 for (auto CompIdx : VOPD::COMPONENTS) {
9827 const auto &CInfo = InstInfo[CompIdx];
9828 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
9829 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
9830 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
9831 if (CInfo.hasSrc2Acc())
9832 addOp(CInfo.getIndexOfDstInParsedOperands());
9833 }
9834
9835 int BitOp3Idx =
9836 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::bitop3);
9837 if (BitOp3Idx != -1) {
9838 OptionalImmIndexMap OptIdx;
9839 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back());
9840 if (Op.isImm())
9841 OptIdx[Op.getImmTy()] = Operands.size() - 1;
9842
9843 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9844 }
9845}
9846
9847//===----------------------------------------------------------------------===//
9848// dpp
9849//===----------------------------------------------------------------------===//
9850
9851bool AMDGPUOperand::isDPP8() const {
9852 return isImmTy(ImmTyDPP8);
9853}
9854
9855bool AMDGPUOperand::isDPPCtrl() const {
9856 using namespace AMDGPU::DPP;
9857
9858 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
9859 if (result) {
9860 int64_t Imm = getImm();
9861 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
9862 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
9863 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
9864 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
9865 (Imm == DppCtrl::WAVE_SHL1) ||
9866 (Imm == DppCtrl::WAVE_ROL1) ||
9867 (Imm == DppCtrl::WAVE_SHR1) ||
9868 (Imm == DppCtrl::WAVE_ROR1) ||
9869 (Imm == DppCtrl::ROW_MIRROR) ||
9870 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
9871 (Imm == DppCtrl::BCAST15) ||
9872 (Imm == DppCtrl::BCAST31) ||
9873 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
9874 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
9875 }
9876 return false;
9877}
9878
9879//===----------------------------------------------------------------------===//
9880// mAI
9881//===----------------------------------------------------------------------===//
9882
9883bool AMDGPUOperand::isBLGP() const {
9884 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
9885}
9886
9887bool AMDGPUOperand::isS16Imm() const {
9888 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
9889}
9890
9891bool AMDGPUOperand::isU16Imm() const {
9892 return isImmLiteral() && isUInt<16>(getImm());
9893}
9894
9895//===----------------------------------------------------------------------===//
9896// dim
9897//===----------------------------------------------------------------------===//
9898
9899bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
9900 // We want to allow "dim:1D" etc.,
9901 // but the initial 1 is tokenized as an integer.
9902 std::string Token;
9903 if (isToken(AsmToken::Integer)) {
9904 SMLoc Loc = getToken().getEndLoc();
9905 Token = std::string(getTokenStr());
9906 lex();
9907 if (getLoc() != Loc)
9908 return false;
9909 }
9910
9911 StringRef Suffix;
9912 if (!parseId(Suffix))
9913 return false;
9914 Token += Suffix;
9915
9916 StringRef DimId = Token;
9917 DimId.consume_front("SQ_RSRC_IMG_");
9918
9920 if (!DimInfo)
9921 return false;
9922
9923 Encoding = DimInfo->Encoding;
9924 return true;
9925}
9926
9927ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
9928 if (!isGFX10Plus())
9929 return ParseStatus::NoMatch;
9930
9931 SMLoc S = getLoc();
9932
9933 if (!trySkipId("dim", AsmToken::Colon))
9934 return ParseStatus::NoMatch;
9935
9936 unsigned Encoding;
9937 SMLoc Loc = getLoc();
9938 if (!parseDimId(Encoding))
9939 return Error(Loc, "invalid dim value");
9940
9941 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
9942 AMDGPUOperand::ImmTyDim));
9943 return ParseStatus::Success;
9944}
9945
9946//===----------------------------------------------------------------------===//
9947// dpp
9948//===----------------------------------------------------------------------===//
9949
9950ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
9951 SMLoc S = getLoc();
9952
9953 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
9954 return ParseStatus::NoMatch;
9955
9956 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
9957
9958 int64_t Sels[8];
9959
9960 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9961 return ParseStatus::Failure;
9962
9963 for (size_t i = 0; i < 8; ++i) {
9964 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9965 return ParseStatus::Failure;
9966
9967 SMLoc Loc = getLoc();
9968 if (getParser().parseAbsoluteExpression(Sels[i]))
9969 return ParseStatus::Failure;
9970 if (0 > Sels[i] || 7 < Sels[i])
9971 return Error(Loc, "expected a 3-bit value");
9972 }
9973
9974 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9975 return ParseStatus::Failure;
9976
9977 unsigned DPP8 = 0;
9978 for (size_t i = 0; i < 8; ++i)
9979 DPP8 |= (Sels[i] << (i * 3));
9980
9981 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9982 return ParseStatus::Success;
9983}
9984
9985bool
9986AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
9987 const OperandVector &Operands) {
9988 if (Ctrl == "row_newbcast")
9989 return isGFX90A();
9990
9991 if (Ctrl == "row_share" ||
9992 Ctrl == "row_xmask")
9993 return isGFX10Plus();
9994
9995 if (Ctrl == "wave_shl" ||
9996 Ctrl == "wave_shr" ||
9997 Ctrl == "wave_rol" ||
9998 Ctrl == "wave_ror" ||
9999 Ctrl == "row_bcast")
10000 return isVI() || isGFX9();
10001
10002 return Ctrl == "row_mirror" ||
10003 Ctrl == "row_half_mirror" ||
10004 Ctrl == "quad_perm" ||
10005 Ctrl == "row_shl" ||
10006 Ctrl == "row_shr" ||
10007 Ctrl == "row_ror";
10008}
10009
10010int64_t
10011AMDGPUAsmParser::parseDPPCtrlPerm() {
10012 // quad_perm:[%d,%d,%d,%d]
10013
10014 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
10015 return -1;
10016
10017 int64_t Val = 0;
10018 for (int i = 0; i < 4; ++i) {
10019 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
10020 return -1;
10021
10022 int64_t Temp;
10023 SMLoc Loc = getLoc();
10024 if (getParser().parseAbsoluteExpression(Temp))
10025 return -1;
10026 if (Temp < 0 || Temp > 3) {
10027 Error(Loc, "expected a 2-bit value");
10028 return -1;
10029 }
10030
10031 Val += (Temp << i * 2);
10032 }
10033
10034 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
10035 return -1;
10036
10037 return Val;
10038}
10039
10040int64_t
10041AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
10042 using namespace AMDGPU::DPP;
10043
10044 // sel:%d
10045
10046 int64_t Val;
10047 SMLoc Loc = getLoc();
10048
10049 if (getParser().parseAbsoluteExpression(Val))
10050 return -1;
10051
10052 struct DppCtrlCheck {
10053 int64_t Ctrl;
10054 int Lo;
10055 int Hi;
10056 };
10057
10058 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
10059 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
10060 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
10061 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
10062 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
10063 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
10064 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
10065 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
10066 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
10067 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
10068 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
10069 .Default({-1, 0, 0});
10070
10071 bool Valid;
10072 if (Check.Ctrl == -1) {
10073 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
10074 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
10075 } else {
10076 Valid = Check.Lo <= Val && Val <= Check.Hi;
10077 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
10078 }
10079
10080 if (!Valid) {
10081 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
10082 return -1;
10083 }
10084
10085 return Val;
10086}
10087
10088ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
10089 using namespace AMDGPU::DPP;
10090
10091 if (!isToken(AsmToken::Identifier) ||
10092 !isSupportedDPPCtrl(getTokenStr(), Operands))
10093 return ParseStatus::NoMatch;
10094
10095 SMLoc S = getLoc();
10096 int64_t Val = -1;
10098
10099 parseId(Ctrl);
10100
10101 if (Ctrl == "row_mirror") {
10102 Val = DppCtrl::ROW_MIRROR;
10103 } else if (Ctrl == "row_half_mirror") {
10104 Val = DppCtrl::ROW_HALF_MIRROR;
10105 } else {
10106 if (skipToken(AsmToken::Colon, "expected a colon")) {
10107 if (Ctrl == "quad_perm") {
10108 Val = parseDPPCtrlPerm();
10109 } else {
10110 Val = parseDPPCtrlSel(Ctrl);
10111 }
10112 }
10113 }
10114
10115 if (Val == -1)
10116 return ParseStatus::Failure;
10117
10118 Operands.push_back(
10119 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
10120 return ParseStatus::Success;
10121}
10122
10123void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
10124 bool IsDPP8) {
10125 OptionalImmIndexMap OptionalIdx;
10126 unsigned Opc = Inst.getOpcode();
10127 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10128
10129 // MAC instructions are special because they have 'old'
10130 // operand which is not tied to dst (but assumed to be).
10131 // They also have dummy unused src2_modifiers.
10132 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
10133 int Src2ModIdx =
10134 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
10135 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
10136 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
10137
10138 unsigned I = 1;
10139 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10140 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10141 }
10142
10143 int Fi = 0;
10144 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
10145 bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
10146 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
10147 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
10148 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
10149
10150 for (unsigned E = Operands.size(); I != E; ++I) {
10151
10152 if (IsMAC) {
10153 int NumOperands = Inst.getNumOperands();
10154 if (OldIdx == NumOperands) {
10155 // Handle old operand
10156 constexpr int DST_IDX = 0;
10157 Inst.addOperand(Inst.getOperand(DST_IDX));
10158 } else if (Src2ModIdx == NumOperands) {
10159 // Add unused dummy src2_modifiers
10161 }
10162 }
10163
10164 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
10165 Inst.addOperand(Inst.getOperand(0));
10166 }
10167
10168 if (IsVOP3CvtSrDpp) {
10169 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
10172 }
10173 }
10174
10175 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10177 if (TiedTo != -1) {
10178 assert((unsigned)TiedTo < Inst.getNumOperands());
10179 // handle tied old or src2 for MAC instructions
10180 Inst.addOperand(Inst.getOperand(TiedTo));
10181 }
10182 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10183 // Add the register arguments
10184 if (IsDPP8 && Op.isDppFI()) {
10185 Fi = Op.getImm();
10186 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10187 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10188 } else if (Op.isReg()) {
10189 Op.addRegOperands(Inst, 1);
10190 } else if (Op.isImm() &&
10191 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
10192 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
10193 Op.addImmOperands(Inst, 1);
10194 } else if (Op.isImm()) {
10195 OptionalIdx[Op.getImmTy()] = I;
10196 } else {
10197 llvm_unreachable("unhandled operand type");
10198 }
10199 }
10200
10201 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp) && !IsVOP3CvtSrDpp)
10202 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10203 AMDGPUOperand::ImmTyClamp);
10204
10205 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
10206 if (VdstInIdx == static_cast<int>(Inst.getNumOperands()))
10207 Inst.addOperand(Inst.getOperand(0));
10208 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10209 AMDGPUOperand::ImmTyByteSel);
10210 }
10211
10212 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10213 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
10214
10215 if (Desc.TSFlags & SIInstrFlags::VOP3P)
10216 cvtVOP3P(Inst, Operands, OptionalIdx);
10217 else if (Desc.TSFlags & SIInstrFlags::VOP3)
10218 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
10219 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
10220 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
10221 }
10222
10223 if (IsDPP8) {
10224 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
10225 using namespace llvm::AMDGPU::DPP;
10226 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10227 } else {
10228 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
10229 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10230 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10231 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10232
10233 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
10234 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10235 AMDGPUOperand::ImmTyDppFI);
10236 }
10237}
10238
10239void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
10240 OptionalImmIndexMap OptionalIdx;
10241
10242 unsigned I = 1;
10243 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10244 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10245 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10246 }
10247
10248 int Fi = 0;
10249 for (unsigned E = Operands.size(); I != E; ++I) {
10250 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10252 if (TiedTo != -1) {
10253 assert((unsigned)TiedTo < Inst.getNumOperands());
10254 // handle tied old or src2 for MAC instructions
10255 Inst.addOperand(Inst.getOperand(TiedTo));
10256 }
10257 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10258 // Add the register arguments
10259 if (Op.isReg() && validateVccOperand(Op.getReg())) {
10260 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
10261 // Skip it.
10262 continue;
10263 }
10264
10265 if (IsDPP8) {
10266 if (Op.isDPP8()) {
10267 Op.addImmOperands(Inst, 1);
10268 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10269 Op.addRegWithFPInputModsOperands(Inst, 2);
10270 } else if (Op.isDppFI()) {
10271 Fi = Op.getImm();
10272 } else if (Op.isReg()) {
10273 Op.addRegOperands(Inst, 1);
10274 } else {
10275 llvm_unreachable("Invalid operand type");
10276 }
10277 } else {
10279 Op.addRegWithFPInputModsOperands(Inst, 2);
10280 } else if (Op.isReg()) {
10281 Op.addRegOperands(Inst, 1);
10282 } else if (Op.isDPPCtrl()) {
10283 Op.addImmOperands(Inst, 1);
10284 } else if (Op.isImm()) {
10285 // Handle optional arguments
10286 OptionalIdx[Op.getImmTy()] = I;
10287 } else {
10288 llvm_unreachable("Invalid operand type");
10289 }
10290 }
10291 }
10292
10293 if (IsDPP8) {
10294 using namespace llvm::AMDGPU::DPP;
10295 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10296 } else {
10297 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10298 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10299 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10300 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
10301 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10302 AMDGPUOperand::ImmTyDppFI);
10303 }
10304 }
10305}
10306
10307//===----------------------------------------------------------------------===//
10308// sdwa
10309//===----------------------------------------------------------------------===//
10310
10311ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
10312 StringRef Prefix,
10313 AMDGPUOperand::ImmTy Type) {
10314 return parseStringOrIntWithPrefix(
10315 Operands, Prefix,
10316 {"BYTE_0", "BYTE_1", "BYTE_2", "BYTE_3", "WORD_0", "WORD_1", "DWORD"},
10317 Type);
10318}
10319
10320ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
10321 return parseStringOrIntWithPrefix(
10322 Operands, "dst_unused", {"UNUSED_PAD", "UNUSED_SEXT", "UNUSED_PRESERVE"},
10323 AMDGPUOperand::ImmTySDWADstUnused);
10324}
10325
10326void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
10327 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
10328}
10329
10330void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
10331 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
10332}
10333
10334void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
10335 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
10336}
10337
10338void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
10339 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
10340}
10341
10342void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
10343 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
10344}
10345
10346void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
10347 uint64_t BasicInstType,
10348 bool SkipDstVcc,
10349 bool SkipSrcVcc) {
10350 using namespace llvm::AMDGPU::SDWA;
10351
10352 OptionalImmIndexMap OptionalIdx;
10353 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
10354 bool SkippedVcc = false;
10355
10356 unsigned I = 1;
10357 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10358 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10359 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10360 }
10361
10362 for (unsigned E = Operands.size(); I != E; ++I) {
10363 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10364 if (SkipVcc && !SkippedVcc && Op.isReg() &&
10365 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
10366 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
10367 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
10368 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
10369 // Skip VCC only if we didn't skip it on previous iteration.
10370 // Note that src0 and src1 occupy 2 slots each because of modifiers.
10371 if (BasicInstType == SIInstrFlags::VOP2 &&
10372 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
10373 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
10374 SkippedVcc = true;
10375 continue;
10376 }
10377 if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) {
10378 SkippedVcc = true;
10379 continue;
10380 }
10381 }
10383 Op.addRegOrImmWithInputModsOperands(Inst, 2);
10384 } else if (Op.isImm()) {
10385 // Handle optional arguments
10386 OptionalIdx[Op.getImmTy()] = I;
10387 } else {
10388 llvm_unreachable("Invalid operand type");
10389 }
10390 SkippedVcc = false;
10391 }
10392
10393 const unsigned Opc = Inst.getOpcode();
10394 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
10395 Opc != AMDGPU::V_NOP_sdwa_vi) {
10396 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
10397 switch (BasicInstType) {
10398 case SIInstrFlags::VOP1:
10399 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
10400 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10401 AMDGPUOperand::ImmTyClamp, 0);
10402
10403 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10404 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10405 AMDGPUOperand::ImmTyOModSI, 0);
10406
10407 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
10408 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10409 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10410
10411 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
10412 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10413 AMDGPUOperand::ImmTySDWADstUnused,
10414 DstUnused::UNUSED_PRESERVE);
10415
10416 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10417 break;
10418
10419 case SIInstrFlags::VOP2:
10420 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10421 AMDGPUOperand::ImmTyClamp, 0);
10422
10423 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
10424 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
10425
10426 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10427 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
10428 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10429 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10430 break;
10431
10432 case SIInstrFlags::VOPC:
10433 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
10434 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10435 AMDGPUOperand::ImmTyClamp, 0);
10436 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10437 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10438 break;
10439
10440 default:
10441 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
10442 }
10443 }
10444
10445 // special case v_mac_{f16, f32}:
10446 // it has src2 register operand that is tied to dst operand
10447 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
10448 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
10449 auto *it = Inst.begin();
10450 std::advance(
10451 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
10452 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
10453 }
10454}
10455
10456/// Force static initialization.
10457extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
10461}
10462
10463#define GET_REGISTER_MATCHER
10464#define GET_MATCHER_IMPLEMENTATION
10465#define GET_MNEMONIC_SPELL_CHECKER
10466#define GET_MNEMONIC_CHECKER
10467#include "AMDGPUGenAsmMatcher.inc"
10468
10469ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
10470 unsigned MCK) {
10471 switch (MCK) {
10472 case MCK_addr64:
10473 return parseTokenOp("addr64", Operands);
10474 case MCK_done:
10475 return parseTokenOp("done", Operands);
10476 case MCK_idxen:
10477 return parseTokenOp("idxen", Operands);
10478 case MCK_lds:
10479 return parseTokenOp("lds", Operands);
10480 case MCK_offen:
10481 return parseTokenOp("offen", Operands);
10482 case MCK_off:
10483 return parseTokenOp("off", Operands);
10484 case MCK_row_95_en:
10485 return parseTokenOp("row_en", Operands);
10486 case MCK_gds:
10487 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
10488 case MCK_tfe:
10489 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
10490 }
10491 return tryCustomParseOperand(Operands, MCK);
10492}
10493
10494// This function should be defined after auto-generated include so that we have
10495// MatchClassKind enum defined
10496unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
10497 unsigned Kind) {
10498 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
10499 // But MatchInstructionImpl() expects to meet token and fails to validate
10500 // operand. This method checks if we are given immediate operand but expect to
10501 // get corresponding token.
10502 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
10503 switch (Kind) {
10504 case MCK_addr64:
10505 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
10506 case MCK_gds:
10507 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
10508 case MCK_lds:
10509 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
10510 case MCK_idxen:
10511 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
10512 case MCK_offen:
10513 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
10514 case MCK_tfe:
10515 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
10516 case MCK_SSrc_b32:
10517 // When operands have expression values, they will return true for isToken,
10518 // because it is not possible to distinguish between a token and an
10519 // expression at parse time. MatchInstructionImpl() will always try to
10520 // match an operand as a token, when isToken returns true, and when the
10521 // name of the expression is not a valid token, the match will fail,
10522 // so we need to handle it here.
10523 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
10524 case MCK_SSrc_f32:
10525 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
10526 case MCK_SOPPBrTarget:
10527 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
10528 case MCK_VReg32OrOff:
10529 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
10530 case MCK_InterpSlot:
10531 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
10532 case MCK_InterpAttr:
10533 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
10534 case MCK_InterpAttrChan:
10535 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
10536 case MCK_SReg_64:
10537 case MCK_SReg_64_XEXEC:
10538 // Null is defined as a 32-bit register but
10539 // it should also be enabled with 64-bit operands or larger.
10540 // The following code enables it for SReg_64 and larger operands
10541 // used as source and destination. Remaining source
10542 // operands are handled in isInlinableImm.
10543 case MCK_SReg_96:
10544 case MCK_SReg_128:
10545 case MCK_SReg_256:
10546 case MCK_SReg_512:
10547 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
10548 default:
10549 return Match_InvalidOperand;
10550 }
10551}
10552
10553//===----------------------------------------------------------------------===//
10554// endpgm
10555//===----------------------------------------------------------------------===//
10556
10557ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
10558 SMLoc S = getLoc();
10559 int64_t Imm = 0;
10560
10561 if (!parseExpr(Imm)) {
10562 // The operand is optional, if not present default to 0
10563 Imm = 0;
10564 }
10565
10566 if (!isUInt<16>(Imm))
10567 return Error(S, "expected a 16-bit value");
10568
10569 Operands.push_back(
10570 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
10571 return ParseStatus::Success;
10572}
10573
10574bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
10575
10576//===----------------------------------------------------------------------===//
10577// Split Barrier
10578//===----------------------------------------------------------------------===//
10579
10580bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
#define Success
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, AMDGPU::OpName OpName)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static bool IsRevOpcode(const unsigned Opcode)
static int getRegClass(RegisterKind Is, unsigned RegWidth)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static MCRegister getSpecialRegForName(StringRef RegName)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0, std::optional< unsigned > InsertAt=std::nullopt)
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDGPU metadata definitions and in-memory representations.
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_READNONE
Definition: Compiler.h:315
#define LLVM_ABI
Definition: Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:132
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
@ Default
Definition: DwarfDebug.cpp:86
std::string Name
uint64_t Size
Symbol * Sym
Definition: ELF_riscv.cpp:479
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static unsigned getOperandSize(MachineInstr &MI, unsigned Idx, MachineRegisterInfo &MRI)
#define Check(C,...)
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
Definition: InlineInfo.cpp:179
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
Register const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
Interface definition for SIInstrInfo.
unsigned unsigned DefaultVal
raw_pwrite_stream & OS
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:480
This file implements the SmallBitVector class.
static bool Enabled
Definition: Statistic.cpp:46
StringSet - A set-like wrapper for the StringMap.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:247
BinaryOperator * Mul
support::ulittle16_t & Lo
Definition: aarch32.cpp:205
support::ulittle16_t & Hi
Definition: aarch32.cpp:204
static const char * getRegisterName(MCRegister Reg)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
Definition: AMDGPUMCExpr.h:79
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
Definition: AMDGPUMCExpr.h:93
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:6057
Class for arbitrary precision integers.
Definition: APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:136
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
Target independent representation for an assembler token.
Definition: MCAsmMacro.h:22
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition: MCAsmMacro.h:103
bool is(TokenKind K) const
Definition: MCAsmMacro.h:75
TokenKind getKind() const
Definition: MCAsmMacro.h:74
This class represents an Operation in the Expression.
Register getReg() const
Base class for user error types.
Definition: Error.h:354
Lightweight error class with error context and mandatory checking.
Definition: Error.h:159
Tagged union holding either a T or a Error.
Definition: Error.h:485
Class representing an expression and its matching format.
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition: MCAsmInfo.h:64
void printExpr(raw_ostream &, const MCExpr &) const
Definition: MCAsmInfo.cpp:153
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
Generic assembler parser interface, for use by target specific assembly parsers.
Definition: MCAsmParser.h:124
MCStreamer & getStreamer()
Definition: MCAsmParser.h:165
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition: MCExpr.h:343
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:353
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:428
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:212
Context object for machine code objects.
Definition: MCContext.h:83
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:414
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:203
const MCSubtargetInfo * getSubtargetInfo() const
Definition: MCContext.h:418
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:188
unsigned getNumOperands() const
Definition: MCInst.h:212
SMLoc getLoc() const
Definition: MCInst.h:208
void setLoc(SMLoc loc)
Definition: MCInst.h:207
unsigned getOpcode() const
Definition: MCInst.h:202
iterator insert(iterator I, const MCOperand &Op)
Definition: MCInst.h:232
void addOperand(const MCOperand Op)
Definition: MCInst.h:215
iterator begin()
Definition: MCInst.h:227
size_t size() const
Definition: MCInst.h:226
const MCOperand & getOperand(unsigned i) const
Definition: MCInst.h:210
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:199
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:27
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:64
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:40
void setImm(int64_t Val)
Definition: MCInst.h:89
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:166
int64_t getImm() const
Definition: MCInst.h:84
static MCOperand createReg(MCRegister Reg)
Definition: MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:145
bool isImm() const
Definition: MCInst.h:66
void setReg(MCRegister Reg)
Set the register number.
Definition: MCInst.h:79
bool isReg() const
Definition: MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition: MCInst.h:73
bool isExpr() const
Definition: MCInst.h:69
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
virtual bool isReg() const =0
isReg - Is this a register operand?
virtual bool isMem() const =0
isMem - Is this a memory operand?
virtual MCRegister getReg() const =0
virtual bool isToken() const =0
isToken - Is this a token operand?
virtual bool isImm() const =0
isImm - Is this an immediate operand?
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
constexpr bool isValid() const
Definition: MCRegister.h:76
Streaming machine code generation interface.
Definition: MCStreamer.h:220
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
MCTargetStreamer * getTargetStreamer()
Definition: MCStreamer.h:324
Generic base class for all target subtargets.
const FeatureBitset & getFeatureBits() const
FeatureBitset ToggleFeature(uint64_t FB)
Toggle a feature and return the re-computed feature bits.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:42
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition: MCSymbol.cpp:50
MCTargetAsmParser - Generic interface to target specific assembly parsers.
virtual bool parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands)=0
Parse one assembly instruction.
MCSubtargetInfo & copySTI()
Create a copy of STI and return a non-const reference to it.
virtual bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
virtual bool ParseDirective(AsmToken DirectiveID)
ParseDirective - Parse a target specific assembler directive This method is deprecated,...
virtual ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
tryParseRegister - parse one register if possible
virtual bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm)=0
Recognize a series of operands of a parsed instruction as an actual MCInst and emit it to the specifi...
const MCInstrInfo & MII
void setAvailableFeatures(const FeatureBitset &Value)
const MCSubtargetInfo & getSTI() const
virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind)
Allow a target to add special case operand matching for things that tblgen doesn't/can't handle effec...
virtual unsigned checkTargetMatchPredicate(MCInst &Inst)
checkTargetMatchPredicate - Validate the instruction match against any complex target predicates not ...
Target specific streamer interface.
Definition: MCStreamer.h:93
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:303
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Represents a location in source code.
Definition: SMLoc.h:23
static SMLoc getFromPointer(const char *Ptr)
Definition: SMLoc.h:36
constexpr const char * getPointer() const
Definition: SMLoc.h:34
constexpr bool isValid() const
Definition: SMLoc.h:29
Represents a range in source code.
Definition: SMLoc.h:48
SMLoc Start
Definition: SMLoc.h:50
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition: DenseSet.h:283
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
Register getReg() const
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:862
StringMapEntry - This is used to represent one value that is inserted into a StringMap.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
Definition: StringRef.h:665
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:269
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:151
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition: StringRef.h:619
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:154
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:148
bool consume_front(StringRef Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition: StringRef.h:645
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition: StringRef.h:281
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:25
bool contains(StringRef key) const
Check if the set contains the given key.
Definition: StringSet.h:60
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:39
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:43
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:68
R Default(T Value)
Definition: StringSwitch.h:177
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM Value Representation.
Definition: Value.h:75
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:194
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:662
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:692
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
Key
PAL metadata keys.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
bool hasMAIInsts(const MCSubtargetInfo &STI)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCSubtargetInfo &ST)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition: SIDefines.h:231
@ OPERAND_REG_IMM_INT64
Definition: SIDefines.h:202
@ OPERAND_REG_IMM_V2FP16
Definition: SIDefines.h:209
@ OPERAND_REG_INLINE_C_FP64
Definition: SIDefines.h:222
@ OPERAND_REG_INLINE_C_BF16
Definition: SIDefines.h:219
@ OPERAND_REG_INLINE_C_V2BF16
Definition: SIDefines.h:224
@ OPERAND_REG_IMM_V2INT16
Definition: SIDefines.h:210
@ OPERAND_REG_IMM_BF16
Definition: SIDefines.h:206
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition: SIDefines.h:201
@ OPERAND_REG_IMM_V2BF16
Definition: SIDefines.h:208
@ OPERAND_REG_IMM_FP16
Definition: SIDefines.h:207
@ OPERAND_REG_INLINE_C_INT64
Definition: SIDefines.h:218
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition: SIDefines.h:216
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition: SIDefines.h:211
@ OPERAND_REG_IMM_FP64
Definition: SIDefines.h:205
@ OPERAND_REG_INLINE_C_V2FP16
Definition: SIDefines.h:225
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition: SIDefines.h:236
@ OPERAND_REG_INLINE_AC_FP32
Definition: SIDefines.h:237
@ OPERAND_REG_IMM_V2INT32
Definition: SIDefines.h:212
@ OPERAND_REG_IMM_FP32
Definition: SIDefines.h:204
@ OPERAND_INPUT_MODS
Definition: SIDefines.h:241
@ OPERAND_REG_INLINE_C_FP32
Definition: SIDefines.h:221
@ OPERAND_REG_INLINE_C_INT32
Definition: SIDefines.h:217
@ OPERAND_REG_INLINE_C_V2INT16
Definition: SIDefines.h:223
@ OPERAND_REG_IMM_V2FP32
Definition: SIDefines.h:213
@ OPERAND_REG_INLINE_AC_FP64
Definition: SIDefines.h:238
@ OPERAND_REG_INLINE_C_FP16
Definition: SIDefines.h:220
@ OPERAND_REG_IMM_INT16
Definition: SIDefines.h:203
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition: SIDefines.h:228
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isGFX1250(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:126
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ STT_AMDGPU_HSA_KERNEL
Definition: ELF.h:1422
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:228
@ OPERAND_IMMEDIATE
Definition: MCInstrDesc.h:61
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:87
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Reg
All possible values of the reg field in the ModR/M byte.
Format
The format used for serializing/deserializing remarks.
Definition: RemarkFormat.h:26
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
Definition: Error.h:1113
@ Offset
Definition: DWP.cpp:477
@ Length
Definition: DWP.cpp:477
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:307
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
Definition: Alignment.h:217
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
void PrintError(const Twine &Msg)
Definition: Error.cpp:104
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:252
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:293
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:295
Target & getTheR600Target()
The target for R600 GPUs.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
Definition: SPIRVUtils.cpp:976
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:159
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition: MathExtras.h:198
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:164
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:399
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Sub
Subtraction of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:312
unsigned M0(unsigned Val)
Definition: VE.h:376
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1777
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:257
const char * toString(DWARFSectionKind Kind)
#define N
RegisterKind Kind
StringLiteral Name
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, bool InitMCExpr=true)
Instruction set architecture version.
Definition: TargetParser.h:132
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
Represents the counter values to wait for in an s_waitcnt instruction.
static LLVM_ABI const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:266
static LLVM_ABI const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:264
static LLVM_ABI const fltSemantics & BFloat() LLVM_READNONE
Definition: APFloat.cpp:265
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:320
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Description of the encoding of one expression Op.
Direction
An enum for the direction of the loop.
Definition: LoopInfo.h:217
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...