LLVM 22.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
16#include "SIDefines.h"
17#include "SIInstrInfo.h"
22#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/StringSet.h"
25#include "llvm/ADT/Twine.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInst.h"
32#include "llvm/MC/MCInstrDesc.h"
38#include "llvm/MC/MCSymbol.h"
46#include <optional>
47
48using namespace llvm;
49using namespace llvm::AMDGPU;
50using namespace llvm::amdhsa;
51
52namespace {
53
54class AMDGPUAsmParser;
55
56enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
57
58//===----------------------------------------------------------------------===//
59// Operand
60//===----------------------------------------------------------------------===//
61
62class AMDGPUOperand : public MCParsedAsmOperand {
63 enum KindTy {
64 Token,
65 Immediate,
66 Register,
67 Expression
68 } Kind;
69
70 SMLoc StartLoc, EndLoc;
71 const AMDGPUAsmParser *AsmParser;
72
73public:
74 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
75 : Kind(Kind_), AsmParser(AsmParser_) {}
76
77 using Ptr = std::unique_ptr<AMDGPUOperand>;
78
79 struct Modifiers {
80 bool Abs = false;
81 bool Neg = false;
82 bool Sext = false;
83 LitModifier Lit = LitModifier::None;
84
85 bool hasFPModifiers() const { return Abs || Neg; }
86 bool hasIntModifiers() const { return Sext; }
87 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
88
89 int64_t getFPModifiersOperand() const {
90 int64_t Operand = 0;
91 Operand |= Abs ? SISrcMods::ABS : 0u;
92 Operand |= Neg ? SISrcMods::NEG : 0u;
93 return Operand;
94 }
95
96 int64_t getIntModifiersOperand() const {
97 int64_t Operand = 0;
98 Operand |= Sext ? SISrcMods::SEXT : 0u;
99 return Operand;
100 }
101
102 int64_t getModifiersOperand() const {
103 assert(!(hasFPModifiers() && hasIntModifiers())
104 && "fp and int modifiers should not be used simultaneously");
105 if (hasFPModifiers())
106 return getFPModifiersOperand();
107 if (hasIntModifiers())
108 return getIntModifiersOperand();
109 return 0;
110 }
111
112 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
113 };
114
115 enum ImmTy {
116 ImmTyNone,
117 ImmTyGDS,
118 ImmTyLDS,
119 ImmTyOffen,
120 ImmTyIdxen,
121 ImmTyAddr64,
122 ImmTyOffset,
123 ImmTyInstOffset,
124 ImmTyOffset0,
125 ImmTyOffset1,
126 ImmTySMEMOffsetMod,
127 ImmTyCPol,
128 ImmTyTFE,
129 ImmTyD16,
130 ImmTyClamp,
131 ImmTyOModSI,
132 ImmTySDWADstSel,
133 ImmTySDWASrc0Sel,
134 ImmTySDWASrc1Sel,
135 ImmTySDWADstUnused,
136 ImmTyDMask,
137 ImmTyDim,
138 ImmTyUNorm,
139 ImmTyDA,
140 ImmTyR128A16,
141 ImmTyA16,
142 ImmTyLWE,
143 ImmTyExpTgt,
144 ImmTyExpCompr,
145 ImmTyExpVM,
146 ImmTyFORMAT,
147 ImmTyHwreg,
148 ImmTyOff,
149 ImmTySendMsg,
150 ImmTyInterpSlot,
151 ImmTyInterpAttr,
152 ImmTyInterpAttrChan,
153 ImmTyOpSel,
154 ImmTyOpSelHi,
155 ImmTyNegLo,
156 ImmTyNegHi,
157 ImmTyIndexKey8bit,
158 ImmTyIndexKey16bit,
159 ImmTyIndexKey32bit,
160 ImmTyDPP8,
161 ImmTyDppCtrl,
162 ImmTyDppRowMask,
163 ImmTyDppBankMask,
164 ImmTyDppBoundCtrl,
165 ImmTyDppFI,
166 ImmTySwizzle,
167 ImmTyGprIdxMode,
168 ImmTyHigh,
169 ImmTyBLGP,
170 ImmTyCBSZ,
171 ImmTyABID,
172 ImmTyEndpgm,
173 ImmTyWaitVDST,
174 ImmTyWaitEXP,
175 ImmTyWaitVAVDst,
176 ImmTyWaitVMVSrc,
177 ImmTyBitOp3,
178 ImmTyMatrixAFMT,
179 ImmTyMatrixBFMT,
180 ImmTyMatrixAScale,
181 ImmTyMatrixBScale,
182 ImmTyMatrixAScaleFmt,
183 ImmTyMatrixBScaleFmt,
184 ImmTyMatrixAReuse,
185 ImmTyMatrixBReuse,
186 ImmTyScaleSel,
187 ImmTyByteSel,
188 };
189
190private:
191 struct TokOp {
192 const char *Data;
193 unsigned Length;
194 };
195
196 struct ImmOp {
197 int64_t Val;
198 ImmTy Type;
199 bool IsFPImm;
200 Modifiers Mods;
201 };
202
203 struct RegOp {
204 MCRegister RegNo;
205 Modifiers Mods;
206 };
207
208 union {
209 TokOp Tok;
210 ImmOp Imm;
211 RegOp Reg;
212 const MCExpr *Expr;
213 };
214
215 // The index of the associated MCInst operand.
216 mutable int MCOpIdx = -1;
217
218public:
219 bool isToken() const override { return Kind == Token; }
220
221 bool isSymbolRefExpr() const {
222 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
223 }
224
225 bool isImm() const override {
226 return Kind == Immediate;
227 }
228
229 bool isInlinableImm(MVT type) const;
230 bool isLiteralImm(MVT type) const;
231
232 bool isRegKind() const {
233 return Kind == Register;
234 }
235
236 bool isReg() const override {
237 return isRegKind() && !hasModifiers();
238 }
239
240 bool isRegOrInline(unsigned RCID, MVT type) const {
241 return isRegClass(RCID) || isInlinableImm(type);
242 }
243
244 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
245 return isRegOrInline(RCID, type) || isLiteralImm(type);
246 }
247
248 bool isRegOrImmWithInt16InputMods() const {
249 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
250 }
251
252 template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {
254 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
255 }
256
257 bool isRegOrImmWithInt32InputMods() const {
258 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
259 }
260
261 bool isRegOrInlineImmWithInt16InputMods() const {
262 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
263 }
264
265 template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {
266 return isRegOrInline(
267 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
268 }
269
270 bool isRegOrInlineImmWithInt32InputMods() const {
271 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
272 }
273
274 bool isRegOrImmWithInt64InputMods() const {
275 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
276 }
277
278 bool isRegOrImmWithFP16InputMods() const {
279 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
280 }
281
282 template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {
284 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
285 }
286
287 bool isRegOrImmWithFP32InputMods() const {
288 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
289 }
290
291 bool isRegOrImmWithFP64InputMods() const {
292 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
293 }
294
295 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
296 return isRegOrInline(
297 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
298 }
299
300 bool isRegOrInlineImmWithFP32InputMods() const {
301 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
302 }
303
304 bool isRegOrInlineImmWithFP64InputMods() const {
305 return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
306 }
307
308 bool isVRegWithInputMods(unsigned RCID) const { return isRegClass(RCID); }
309
310 bool isVRegWithFP32InputMods() const {
311 return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
312 }
313
314 bool isVRegWithFP64InputMods() const {
315 return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
316 }
317
318 bool isPackedFP16InputMods() const {
319 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
320 }
321
322 bool isPackedVGPRFP32InputMods() const {
323 return isRegOrImmWithInputMods(AMDGPU::VReg_64RegClassID, MVT::v2f32);
324 }
325
326 bool isVReg() const {
327 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
328 isRegClass(AMDGPU::VReg_64RegClassID) ||
329 isRegClass(AMDGPU::VReg_96RegClassID) ||
330 isRegClass(AMDGPU::VReg_128RegClassID) ||
331 isRegClass(AMDGPU::VReg_160RegClassID) ||
332 isRegClass(AMDGPU::VReg_192RegClassID) ||
333 isRegClass(AMDGPU::VReg_256RegClassID) ||
334 isRegClass(AMDGPU::VReg_512RegClassID) ||
335 isRegClass(AMDGPU::VReg_1024RegClassID);
336 }
337
338 bool isVReg32() const {
339 return isRegClass(AMDGPU::VGPR_32RegClassID);
340 }
341
342 bool isVReg32OrOff() const {
343 return isOff() || isVReg32();
344 }
345
346 bool isNull() const {
347 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
348 }
349
350 bool isVRegWithInputMods() const;
351 template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
352 template <bool IsFake16> bool isT16VRegWithInputMods() const;
353
354 bool isSDWAOperand(MVT type) const;
355 bool isSDWAFP16Operand() const;
356 bool isSDWAFP32Operand() const;
357 bool isSDWAInt16Operand() const;
358 bool isSDWAInt32Operand() const;
359
360 bool isImmTy(ImmTy ImmT) const {
361 return isImm() && Imm.Type == ImmT;
362 }
363
364 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
365
366 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
367
368 bool isImmModifier() const {
369 return isImm() && Imm.Type != ImmTyNone;
370 }
371
372 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
373 bool isDim() const { return isImmTy(ImmTyDim); }
374 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
375 bool isOff() const { return isImmTy(ImmTyOff); }
376 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
377 bool isOffen() const { return isImmTy(ImmTyOffen); }
378 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
379 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
380 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
381 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
382 bool isGDS() const { return isImmTy(ImmTyGDS); }
383 bool isLDS() const { return isImmTy(ImmTyLDS); }
384 bool isCPol() const { return isImmTy(ImmTyCPol); }
385 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
386 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
387 bool isIndexKey32bit() const { return isImmTy(ImmTyIndexKey32bit); }
388 bool isMatrixAFMT() const { return isImmTy(ImmTyMatrixAFMT); }
389 bool isMatrixBFMT() const { return isImmTy(ImmTyMatrixBFMT); }
390 bool isMatrixAScale() const { return isImmTy(ImmTyMatrixAScale); }
391 bool isMatrixBScale() const { return isImmTy(ImmTyMatrixBScale); }
392 bool isMatrixAScaleFmt() const { return isImmTy(ImmTyMatrixAScaleFmt); }
393 bool isMatrixBScaleFmt() const { return isImmTy(ImmTyMatrixBScaleFmt); }
394 bool isMatrixAReuse() const { return isImmTy(ImmTyMatrixAReuse); }
395 bool isMatrixBReuse() const { return isImmTy(ImmTyMatrixBReuse); }
396 bool isTFE() const { return isImmTy(ImmTyTFE); }
397 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
398 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
399 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
400 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
401 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
402 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
403 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
404 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
405 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
406 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
407 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
408 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
409 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
410 bool isBitOp3() const { return isImmTy(ImmTyBitOp3) && isUInt<8>(getImm()); }
411
412 bool isRegOrImm() const {
413 return isReg() || isImm();
414 }
415
416 bool isRegClass(unsigned RCID) const;
417
418 bool isInlineValue() const;
419
420 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
421 return isRegOrInline(RCID, type) && !hasModifiers();
422 }
423
424 bool isSCSrcB16() const {
425 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
426 }
427
428 bool isSCSrcV2B16() const {
429 return isSCSrcB16();
430 }
431
432 bool isSCSrc_b32() const {
433 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
434 }
435
436 bool isSCSrc_b64() const {
437 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
438 }
439
440 bool isBoolReg() const;
441
442 bool isSCSrcF16() const {
443 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
444 }
445
446 bool isSCSrcV2F16() const {
447 return isSCSrcF16();
448 }
449
450 bool isSCSrcF32() const {
451 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
452 }
453
454 bool isSCSrcF64() const {
455 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
456 }
457
458 bool isSSrc_b32() const {
459 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
460 }
461
462 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
463
464 bool isSSrcV2B16() const {
465 llvm_unreachable("cannot happen");
466 return isSSrc_b16();
467 }
468
469 bool isSSrc_b64() const {
470 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
471 // See isVSrc64().
472 return isSCSrc_b64() || isLiteralImm(MVT::i64) ||
473 (((const MCTargetAsmParser *)AsmParser)
474 ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
475 isExpr());
476 }
477
478 bool isSSrc_f32() const {
479 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
480 }
481
482 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
483
484 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
485
486 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
487
488 bool isSSrcV2F16() const {
489 llvm_unreachable("cannot happen");
490 return isSSrc_f16();
491 }
492
493 bool isSSrcV2FP32() const {
494 llvm_unreachable("cannot happen");
495 return isSSrc_f32();
496 }
497
498 bool isSCSrcV2FP32() const {
499 llvm_unreachable("cannot happen");
500 return isSCSrcF32();
501 }
502
503 bool isSSrcV2INT32() const {
504 llvm_unreachable("cannot happen");
505 return isSSrc_b32();
506 }
507
508 bool isSCSrcV2INT32() const {
509 llvm_unreachable("cannot happen");
510 return isSCSrc_b32();
511 }
512
513 bool isSSrcOrLds_b32() const {
514 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
515 isLiteralImm(MVT::i32) || isExpr();
516 }
517
518 bool isVCSrc_b32() const {
519 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
520 }
521
522 bool isVCSrc_b32_Lo256() const {
523 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo256RegClassID, MVT::i32);
524 }
525
526 bool isVCSrc_b64_Lo256() const {
527 return isRegOrInlineNoMods(AMDGPU::VS_64_Lo256RegClassID, MVT::i64);
528 }
529
530 bool isVCSrc_b64() const {
531 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
532 }
533
534 bool isVCSrcT_b16() const {
535 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
536 }
537
538 bool isVCSrcTB16_Lo128() const {
539 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
540 }
541
542 bool isVCSrcFake16B16_Lo128() const {
543 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
544 }
545
546 bool isVCSrc_b16() const {
547 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
548 }
549
550 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
551
552 bool isVCSrc_f32() const {
553 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
554 }
555
556 bool isVCSrc_f64() const {
557 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
558 }
559
560 bool isVCSrcTBF16() const {
561 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
562 }
563
564 bool isVCSrcT_f16() const {
565 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
566 }
567
568 bool isVCSrcT_bf16() const {
569 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
570 }
571
572 bool isVCSrcTBF16_Lo128() const {
573 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
574 }
575
576 bool isVCSrcTF16_Lo128() const {
577 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
578 }
579
580 bool isVCSrcFake16BF16_Lo128() const {
581 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
582 }
583
584 bool isVCSrcFake16F16_Lo128() const {
585 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
586 }
587
588 bool isVCSrc_bf16() const {
589 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
590 }
591
592 bool isVCSrc_f16() const {
593 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
594 }
595
596 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
597
598 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
599
600 bool isVSrc_b32() const {
601 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
602 }
603
604 bool isVSrc_b64() const { return isVCSrc_f64() || isLiteralImm(MVT::i64); }
605
606 bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
607
608 bool isVSrcT_b16_Lo128() const {
609 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
610 }
611
612 bool isVSrcFake16_b16_Lo128() const {
613 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
614 }
615
616 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
617
618 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
619
620 bool isVCSrcV2FP32() const { return isVCSrc_f64(); }
621
622 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
623
624 bool isVCSrc_v2b32() const { return isVCSrc_b64(); }
625
626 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
627
628 bool isVSrc_f32() const {
629 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
630 }
631
632 bool isVSrc_f64() const { return isVCSrc_f64() || isLiteralImm(MVT::f64); }
633
634 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
635
636 bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
637
638 bool isVSrcT_bf16_Lo128() const {
639 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
640 }
641
642 bool isVSrcT_f16_Lo128() const {
643 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
644 }
645
646 bool isVSrcFake16_bf16_Lo128() const {
647 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
648 }
649
650 bool isVSrcFake16_f16_Lo128() const {
651 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
652 }
653
654 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
655
656 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
657
658 bool isVSrc_v2bf16() const {
659 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
660 }
661
662 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
663
664 bool isVSrc_NoInline_v2f16() const { return isVSrc_v2f16(); }
665
666 bool isVISrcB32() const {
667 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
668 }
669
670 bool isVISrcB16() const {
671 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
672 }
673
674 bool isVISrcV2B16() const {
675 return isVISrcB16();
676 }
677
678 bool isVISrcF32() const {
679 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
680 }
681
682 bool isVISrcF16() const {
683 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
684 }
685
686 bool isVISrcV2F16() const {
687 return isVISrcF16() || isVISrcB32();
688 }
689
690 bool isVISrc_64_bf16() const {
691 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
692 }
693
694 bool isVISrc_64_f16() const {
695 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
696 }
697
698 bool isVISrc_64_b32() const {
699 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
700 }
701
702 bool isVISrc_64B64() const {
703 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
704 }
705
706 bool isVISrc_64_f64() const {
707 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
708 }
709
710 bool isVISrc_64V2FP32() const {
711 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
712 }
713
714 bool isVISrc_64V2INT32() const {
715 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
716 }
717
718 bool isVISrc_256_b32() const {
719 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
720 }
721
722 bool isVISrc_256_f32() const {
723 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
724 }
725
726 bool isVISrc_256B64() const {
727 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
728 }
729
730 bool isVISrc_256_f64() const {
731 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
732 }
733
734 bool isVISrc_512_f64() const {
735 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64);
736 }
737
738 bool isVISrc_128B16() const {
739 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
740 }
741
742 bool isVISrc_128V2B16() const {
743 return isVISrc_128B16();
744 }
745
746 bool isVISrc_128_b32() const {
747 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
748 }
749
750 bool isVISrc_128_f32() const {
751 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
752 }
753
754 bool isVISrc_256V2FP32() const {
755 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
756 }
757
758 bool isVISrc_256V2INT32() const {
759 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
760 }
761
762 bool isVISrc_512_b32() const {
763 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
764 }
765
766 bool isVISrc_512B16() const {
767 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
768 }
769
770 bool isVISrc_512V2B16() const {
771 return isVISrc_512B16();
772 }
773
774 bool isVISrc_512_f32() const {
775 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
776 }
777
778 bool isVISrc_512F16() const {
779 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
780 }
781
782 bool isVISrc_512V2F16() const {
783 return isVISrc_512F16() || isVISrc_512_b32();
784 }
785
786 bool isVISrc_1024_b32() const {
787 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
788 }
789
790 bool isVISrc_1024B16() const {
791 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
792 }
793
794 bool isVISrc_1024V2B16() const {
795 return isVISrc_1024B16();
796 }
797
798 bool isVISrc_1024_f32() const {
799 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
800 }
801
802 bool isVISrc_1024F16() const {
803 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
804 }
805
806 bool isVISrc_1024V2F16() const {
807 return isVISrc_1024F16() || isVISrc_1024_b32();
808 }
809
810 bool isAISrcB32() const {
811 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
812 }
813
814 bool isAISrcB16() const {
815 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
816 }
817
818 bool isAISrcV2B16() const {
819 return isAISrcB16();
820 }
821
822 bool isAISrcF32() const {
823 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
824 }
825
826 bool isAISrcF16() const {
827 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
828 }
829
830 bool isAISrcV2F16() const {
831 return isAISrcF16() || isAISrcB32();
832 }
833
834 bool isAISrc_64B64() const {
835 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
836 }
837
838 bool isAISrc_64_f64() const {
839 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
840 }
841
842 bool isAISrc_128_b32() const {
843 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
844 }
845
846 bool isAISrc_128B16() const {
847 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
848 }
849
850 bool isAISrc_128V2B16() const {
851 return isAISrc_128B16();
852 }
853
854 bool isAISrc_128_f32() const {
855 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
856 }
857
858 bool isAISrc_128F16() const {
859 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
860 }
861
862 bool isAISrc_128V2F16() const {
863 return isAISrc_128F16() || isAISrc_128_b32();
864 }
865
866 bool isVISrc_128_bf16() const {
867 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
868 }
869
870 bool isVISrc_128_f16() const {
871 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
872 }
873
874 bool isVISrc_128V2F16() const {
875 return isVISrc_128_f16() || isVISrc_128_b32();
876 }
877
878 bool isAISrc_256B64() const {
879 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
880 }
881
882 bool isAISrc_256_f64() const {
883 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
884 }
885
886 bool isAISrc_512_b32() const {
887 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
888 }
889
890 bool isAISrc_512B16() const {
891 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
892 }
893
894 bool isAISrc_512V2B16() const {
895 return isAISrc_512B16();
896 }
897
898 bool isAISrc_512_f32() const {
899 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
900 }
901
902 bool isAISrc_512F16() const {
903 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
904 }
905
906 bool isAISrc_512V2F16() const {
907 return isAISrc_512F16() || isAISrc_512_b32();
908 }
909
910 bool isAISrc_1024_b32() const {
911 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
912 }
913
914 bool isAISrc_1024B16() const {
915 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
916 }
917
918 bool isAISrc_1024V2B16() const {
919 return isAISrc_1024B16();
920 }
921
922 bool isAISrc_1024_f32() const {
923 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
924 }
925
926 bool isAISrc_1024F16() const {
927 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
928 }
929
930 bool isAISrc_1024V2F16() const {
931 return isAISrc_1024F16() || isAISrc_1024_b32();
932 }
933
934 bool isKImmFP32() const {
935 return isLiteralImm(MVT::f32);
936 }
937
938 bool isKImmFP16() const {
939 return isLiteralImm(MVT::f16);
940 }
941
942 bool isKImmFP64() const { return isLiteralImm(MVT::f64); }
943
944 bool isMem() const override {
945 return false;
946 }
947
948 bool isExpr() const {
949 return Kind == Expression;
950 }
951
952 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
953
954 bool isSWaitCnt() const;
955 bool isDepCtr() const;
956 bool isSDelayALU() const;
957 bool isHwreg() const;
958 bool isSendMsg() const;
959 bool isSplitBarrier() const;
960 bool isSwizzle() const;
961 bool isSMRDOffset8() const;
962 bool isSMEMOffset() const;
963 bool isSMRDLiteralOffset() const;
964 bool isDPP8() const;
965 bool isDPPCtrl() const;
966 bool isBLGP() const;
967 bool isGPRIdxMode() const;
968 bool isS16Imm() const;
969 bool isU16Imm() const;
970 bool isEndpgm() const;
971
972 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
973 return [this, P]() { return P(*this); };
974 }
975
976 StringRef getToken() const {
977 assert(isToken());
978 return StringRef(Tok.Data, Tok.Length);
979 }
980
981 int64_t getImm() const {
982 assert(isImm());
983 return Imm.Val;
984 }
985
986 void setImm(int64_t Val) {
987 assert(isImm());
988 Imm.Val = Val;
989 }
990
991 ImmTy getImmTy() const {
992 assert(isImm());
993 return Imm.Type;
994 }
995
996 MCRegister getReg() const override {
997 assert(isRegKind());
998 return Reg.RegNo;
999 }
1000
1001 SMLoc getStartLoc() const override {
1002 return StartLoc;
1003 }
1004
1005 SMLoc getEndLoc() const override {
1006 return EndLoc;
1007 }
1008
1009 SMRange getLocRange() const {
1010 return SMRange(StartLoc, EndLoc);
1011 }
1012
1013 int getMCOpIdx() const { return MCOpIdx; }
1014
1015 Modifiers getModifiers() const {
1016 assert(isRegKind() || isImmTy(ImmTyNone));
1017 return isRegKind() ? Reg.Mods : Imm.Mods;
1018 }
1019
1020 void setModifiers(Modifiers Mods) {
1021 assert(isRegKind() || isImmTy(ImmTyNone));
1022 if (isRegKind())
1023 Reg.Mods = Mods;
1024 else
1025 Imm.Mods = Mods;
1026 }
1027
1028 bool hasModifiers() const {
1029 return getModifiers().hasModifiers();
1030 }
1031
1032 bool hasFPModifiers() const {
1033 return getModifiers().hasFPModifiers();
1034 }
1035
1036 bool hasIntModifiers() const {
1037 return getModifiers().hasIntModifiers();
1038 }
1039
1040 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1041
1042 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1043
1044 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1045
1046 void addRegOperands(MCInst &Inst, unsigned N) const;
1047
1048 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1049 if (isRegKind())
1050 addRegOperands(Inst, N);
1051 else
1052 addImmOperands(Inst, N);
1053 }
1054
1055 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1056 Modifiers Mods = getModifiers();
1057 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1058 if (isRegKind()) {
1059 addRegOperands(Inst, N);
1060 } else {
1061 addImmOperands(Inst, N, false);
1062 }
1063 }
1064
1065 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1066 assert(!hasIntModifiers());
1067 addRegOrImmWithInputModsOperands(Inst, N);
1068 }
1069
1070 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1071 assert(!hasFPModifiers());
1072 addRegOrImmWithInputModsOperands(Inst, N);
1073 }
1074
1075 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1076 Modifiers Mods = getModifiers();
1077 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1078 assert(isRegKind());
1079 addRegOperands(Inst, N);
1080 }
1081
1082 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1083 assert(!hasIntModifiers());
1084 addRegWithInputModsOperands(Inst, N);
1085 }
1086
1087 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1088 assert(!hasFPModifiers());
1089 addRegWithInputModsOperands(Inst, N);
1090 }
1091
1092 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1093 // clang-format off
1094 switch (Type) {
1095 case ImmTyNone: OS << "None"; break;
1096 case ImmTyGDS: OS << "GDS"; break;
1097 case ImmTyLDS: OS << "LDS"; break;
1098 case ImmTyOffen: OS << "Offen"; break;
1099 case ImmTyIdxen: OS << "Idxen"; break;
1100 case ImmTyAddr64: OS << "Addr64"; break;
1101 case ImmTyOffset: OS << "Offset"; break;
1102 case ImmTyInstOffset: OS << "InstOffset"; break;
1103 case ImmTyOffset0: OS << "Offset0"; break;
1104 case ImmTyOffset1: OS << "Offset1"; break;
1105 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1106 case ImmTyCPol: OS << "CPol"; break;
1107 case ImmTyIndexKey8bit: OS << "index_key"; break;
1108 case ImmTyIndexKey16bit: OS << "index_key"; break;
1109 case ImmTyIndexKey32bit: OS << "index_key"; break;
1110 case ImmTyTFE: OS << "TFE"; break;
1111 case ImmTyD16: OS << "D16"; break;
1112 case ImmTyFORMAT: OS << "FORMAT"; break;
1113 case ImmTyClamp: OS << "Clamp"; break;
1114 case ImmTyOModSI: OS << "OModSI"; break;
1115 case ImmTyDPP8: OS << "DPP8"; break;
1116 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1117 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1118 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1119 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1120 case ImmTyDppFI: OS << "DppFI"; break;
1121 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1122 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1123 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1124 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1125 case ImmTyDMask: OS << "DMask"; break;
1126 case ImmTyDim: OS << "Dim"; break;
1127 case ImmTyUNorm: OS << "UNorm"; break;
1128 case ImmTyDA: OS << "DA"; break;
1129 case ImmTyR128A16: OS << "R128A16"; break;
1130 case ImmTyA16: OS << "A16"; break;
1131 case ImmTyLWE: OS << "LWE"; break;
1132 case ImmTyOff: OS << "Off"; break;
1133 case ImmTyExpTgt: OS << "ExpTgt"; break;
1134 case ImmTyExpCompr: OS << "ExpCompr"; break;
1135 case ImmTyExpVM: OS << "ExpVM"; break;
1136 case ImmTyHwreg: OS << "Hwreg"; break;
1137 case ImmTySendMsg: OS << "SendMsg"; break;
1138 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1139 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1140 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1141 case ImmTyOpSel: OS << "OpSel"; break;
1142 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1143 case ImmTyNegLo: OS << "NegLo"; break;
1144 case ImmTyNegHi: OS << "NegHi"; break;
1145 case ImmTySwizzle: OS << "Swizzle"; break;
1146 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1147 case ImmTyHigh: OS << "High"; break;
1148 case ImmTyBLGP: OS << "BLGP"; break;
1149 case ImmTyCBSZ: OS << "CBSZ"; break;
1150 case ImmTyABID: OS << "ABID"; break;
1151 case ImmTyEndpgm: OS << "Endpgm"; break;
1152 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1153 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1154 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1155 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1156 case ImmTyBitOp3: OS << "BitOp3"; break;
1157 case ImmTyMatrixAFMT: OS << "ImmTyMatrixAFMT"; break;
1158 case ImmTyMatrixBFMT: OS << "ImmTyMatrixBFMT"; break;
1159 case ImmTyMatrixAScale: OS << "ImmTyMatrixAScale"; break;
1160 case ImmTyMatrixBScale: OS << "ImmTyMatrixBScale"; break;
1161 case ImmTyMatrixAScaleFmt: OS << "ImmTyMatrixAScaleFmt"; break;
1162 case ImmTyMatrixBScaleFmt: OS << "ImmTyMatrixBScaleFmt"; break;
1163 case ImmTyMatrixAReuse: OS << "ImmTyMatrixAReuse"; break;
1164 case ImmTyMatrixBReuse: OS << "ImmTyMatrixBReuse"; break;
1165 case ImmTyScaleSel: OS << "ScaleSel" ; break;
1166 case ImmTyByteSel: OS << "ByteSel" ; break;
1167 }
1168 // clang-format on
1169 }
1170
1171 void print(raw_ostream &OS, const MCAsmInfo &MAI) const override {
1172 switch (Kind) {
1173 case Register:
1174 OS << "<register " << AMDGPUInstPrinter::getRegisterName(getReg())
1175 << " mods: " << Reg.Mods << '>';
1176 break;
1177 case Immediate:
1178 OS << '<' << getImm();
1179 if (getImmTy() != ImmTyNone) {
1180 OS << " type: "; printImmTy(OS, getImmTy());
1181 }
1182 OS << " mods: " << Imm.Mods << '>';
1183 break;
1184 case Token:
1185 OS << '\'' << getToken() << '\'';
1186 break;
1187 case Expression:
1188 OS << "<expr ";
1189 MAI.printExpr(OS, *Expr);
1190 OS << '>';
1191 break;
1192 }
1193 }
1194
1195 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1196 int64_t Val, SMLoc Loc,
1197 ImmTy Type = ImmTyNone,
1198 bool IsFPImm = false) {
1199 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1200 Op->Imm.Val = Val;
1201 Op->Imm.IsFPImm = IsFPImm;
1202 Op->Imm.Type = Type;
1203 Op->Imm.Mods = Modifiers();
1204 Op->StartLoc = Loc;
1205 Op->EndLoc = Loc;
1206 return Op;
1207 }
1208
1209 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1210 StringRef Str, SMLoc Loc,
1211 bool HasExplicitEncodingSize = true) {
1212 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1213 Res->Tok.Data = Str.data();
1214 Res->Tok.Length = Str.size();
1215 Res->StartLoc = Loc;
1216 Res->EndLoc = Loc;
1217 return Res;
1218 }
1219
1220 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1221 MCRegister Reg, SMLoc S, SMLoc E) {
1222 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1223 Op->Reg.RegNo = Reg;
1224 Op->Reg.Mods = Modifiers();
1225 Op->StartLoc = S;
1226 Op->EndLoc = E;
1227 return Op;
1228 }
1229
1230 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1231 const class MCExpr *Expr, SMLoc S) {
1232 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1233 Op->Expr = Expr;
1234 Op->StartLoc = S;
1235 Op->EndLoc = S;
1236 return Op;
1237 }
1238};
1239
1240raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1241 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1242 return OS;
1243}
1244
1245//===----------------------------------------------------------------------===//
1246// AsmParser
1247//===----------------------------------------------------------------------===//
1248
1249// TODO: define GET_SUBTARGET_FEATURE_NAME
1250#define GET_REGISTER_MATCHER
1251#include "AMDGPUGenAsmMatcher.inc"
1252#undef GET_REGISTER_MATCHER
1253#undef GET_SUBTARGET_FEATURE_NAME
1254
1255// Holds info related to the current kernel, e.g. count of SGPRs used.
1256// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1257// .amdgpu_hsa_kernel or at EOF.
1258class KernelScopeInfo {
1259 int SgprIndexUnusedMin = -1;
1260 int VgprIndexUnusedMin = -1;
1261 int AgprIndexUnusedMin = -1;
1262 MCContext *Ctx = nullptr;
1263 MCSubtargetInfo const *MSTI = nullptr;
1264
1265 void usesSgprAt(int i) {
1266 if (i >= SgprIndexUnusedMin) {
1267 SgprIndexUnusedMin = ++i;
1268 if (Ctx) {
1269 MCSymbol* const Sym =
1270 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1271 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1272 }
1273 }
1274 }
1275
1276 void usesVgprAt(int i) {
1277 if (i >= VgprIndexUnusedMin) {
1278 VgprIndexUnusedMin = ++i;
1279 if (Ctx) {
1280 MCSymbol* const Sym =
1281 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1282 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1283 VgprIndexUnusedMin);
1284 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1285 }
1286 }
1287 }
1288
1289 void usesAgprAt(int i) {
1290 // Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction
1291 if (!hasMAIInsts(*MSTI))
1292 return;
1293
1294 if (i >= AgprIndexUnusedMin) {
1295 AgprIndexUnusedMin = ++i;
1296 if (Ctx) {
1297 MCSymbol* const Sym =
1298 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1299 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1300
1301 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1302 MCSymbol* const vSym =
1303 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1304 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1305 VgprIndexUnusedMin);
1306 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1307 }
1308 }
1309 }
1310
1311public:
1312 KernelScopeInfo() = default;
1313
1314 void initialize(MCContext &Context) {
1315 Ctx = &Context;
1316 MSTI = Ctx->getSubtargetInfo();
1317
1318 usesSgprAt(SgprIndexUnusedMin = -1);
1319 usesVgprAt(VgprIndexUnusedMin = -1);
1320 if (hasMAIInsts(*MSTI)) {
1321 usesAgprAt(AgprIndexUnusedMin = -1);
1322 }
1323 }
1324
1325 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1326 unsigned RegWidth) {
1327 switch (RegKind) {
1328 case IS_SGPR:
1329 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1330 break;
1331 case IS_AGPR:
1332 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1333 break;
1334 case IS_VGPR:
1335 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1336 break;
1337 default:
1338 break;
1339 }
1340 }
1341};
1342
1343class AMDGPUAsmParser : public MCTargetAsmParser {
1344 MCAsmParser &Parser;
1345
1346 unsigned ForcedEncodingSize = 0;
1347 bool ForcedDPP = false;
1348 bool ForcedSDWA = false;
1349 KernelScopeInfo KernelScope;
1350 const unsigned HwMode;
1351
1352 /// @name Auto-generated Match Functions
1353 /// {
1354
1355#define GET_ASSEMBLER_HEADER
1356#include "AMDGPUGenAsmMatcher.inc"
1357
1358 /// }
1359
1360 /// Get size of register operand
1361 unsigned getRegOperandSize(const MCInstrDesc &Desc, unsigned OpNo) const {
1362 assert(OpNo < Desc.NumOperands);
1363 int16_t RCID = MII.getOpRegClassID(Desc.operands()[OpNo], HwMode);
1364 return getRegBitWidth(RCID) / 8;
1365 }
1366
1367private:
1368 void createConstantSymbol(StringRef Id, int64_t Val);
1369
1370 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1371 bool OutOfRangeError(SMRange Range);
1372 /// Calculate VGPR/SGPR blocks required for given target, reserved
1373 /// registers, and user-specified NextFreeXGPR values.
1374 ///
1375 /// \param Features [in] Target features, used for bug corrections.
1376 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1377 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1378 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1379 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1380 /// descriptor field, if valid.
1381 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1382 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1383 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1384 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1385 /// \param VGPRBlocks [out] Result VGPR block count.
1386 /// \param SGPRBlocks [out] Result SGPR block count.
1387 bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1388 const MCExpr *FlatScrUsed, bool XNACKUsed,
1389 std::optional<bool> EnableWavefrontSize32,
1390 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1391 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1392 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
1393 bool ParseDirectiveAMDGCNTarget();
1394 bool ParseDirectiveAMDHSACodeObjectVersion();
1395 bool ParseDirectiveAMDHSAKernel();
1396 bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1397 bool ParseDirectiveAMDKernelCodeT();
1398 // TODO: Possibly make subtargetHasRegister const.
1399 bool subtargetHasRegister(const MCRegisterInfo &MRI, MCRegister Reg);
1400 bool ParseDirectiveAMDGPUHsaKernel();
1401
1402 bool ParseDirectiveISAVersion();
1403 bool ParseDirectiveHSAMetadata();
1404 bool ParseDirectivePALMetadataBegin();
1405 bool ParseDirectivePALMetadata();
1406 bool ParseDirectiveAMDGPULDS();
1407
1408 /// Common code to parse out a block of text (typically YAML) between start and
1409 /// end directives.
1410 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1411 const char *AssemblerDirectiveEnd,
1412 std::string &CollectString);
1413
1414 bool AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
1415 RegisterKind RegKind, MCRegister Reg1, SMLoc Loc);
1416 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1417 unsigned &RegNum, unsigned &RegWidth,
1418 bool RestoreOnFailure = false);
1419 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1420 unsigned &RegNum, unsigned &RegWidth,
1421 SmallVectorImpl<AsmToken> &Tokens);
1422 MCRegister ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1423 unsigned &RegWidth,
1424 SmallVectorImpl<AsmToken> &Tokens);
1425 MCRegister ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1426 unsigned &RegWidth,
1427 SmallVectorImpl<AsmToken> &Tokens);
1428 MCRegister ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1429 unsigned &RegWidth,
1430 SmallVectorImpl<AsmToken> &Tokens);
1431 bool ParseRegRange(unsigned &Num, unsigned &Width, unsigned &SubReg);
1432 MCRegister getRegularReg(RegisterKind RegKind, unsigned RegNum,
1433 unsigned SubReg, unsigned RegWidth, SMLoc Loc);
1434
1435 bool isRegister();
1436 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1437 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1438 void initializeGprCountSymbol(RegisterKind RegKind);
1439 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1440 unsigned RegWidth);
1441 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1442 bool IsAtomic);
1443
1444public:
1445 enum OperandMode {
1446 OperandMode_Default,
1447 OperandMode_NSA,
1448 };
1449
1450 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1451
1452 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1453 const MCInstrInfo &MII, const MCTargetOptions &Options)
1454 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser),
1455 HwMode(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)) {
1457
1458 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1459
1460 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1461 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1462 createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
1463 createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
1464 createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping);
1465 } else {
1466 createConstantSymbol(".option.machine_version_major", ISA.Major);
1467 createConstantSymbol(".option.machine_version_minor", ISA.Minor);
1468 createConstantSymbol(".option.machine_version_stepping", ISA.Stepping);
1469 }
1470 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1471 initializeGprCountSymbol(IS_VGPR);
1472 initializeGprCountSymbol(IS_SGPR);
1473 } else
1474 KernelScope.initialize(getContext());
1475
1476 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1477 createConstantSymbol(Symbol, Code);
1478
1479 createConstantSymbol("UC_VERSION_W64_BIT", 0x2000);
1480 createConstantSymbol("UC_VERSION_W32_BIT", 0x4000);
1481 createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000);
1482 }
1483
1484 bool hasMIMG_R128() const {
1485 return AMDGPU::hasMIMG_R128(getSTI());
1486 }
1487
1488 bool hasPackedD16() const {
1489 return AMDGPU::hasPackedD16(getSTI());
1490 }
1491
1492 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1493
1494 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1495
1496 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1497
1498 bool isSI() const {
1499 return AMDGPU::isSI(getSTI());
1500 }
1501
1502 bool isCI() const {
1503 return AMDGPU::isCI(getSTI());
1504 }
1505
1506 bool isVI() const {
1507 return AMDGPU::isVI(getSTI());
1508 }
1509
1510 bool isGFX9() const {
1511 return AMDGPU::isGFX9(getSTI());
1512 }
1513
1514 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1515 bool isGFX90A() const {
1516 return AMDGPU::isGFX90A(getSTI());
1517 }
1518
1519 bool isGFX940() const {
1520 return AMDGPU::isGFX940(getSTI());
1521 }
1522
1523 bool isGFX9Plus() const {
1524 return AMDGPU::isGFX9Plus(getSTI());
1525 }
1526
1527 bool isGFX10() const {
1528 return AMDGPU::isGFX10(getSTI());
1529 }
1530
1531 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1532
1533 bool isGFX11() const {
1534 return AMDGPU::isGFX11(getSTI());
1535 }
1536
1537 bool isGFX11Plus() const {
1538 return AMDGPU::isGFX11Plus(getSTI());
1539 }
1540
1541 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1542
1543 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1544
1545 bool isGFX1250() const { return AMDGPU::isGFX1250(getSTI()); }
1546
1547 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1548
1549 bool isGFX10_BEncoding() const {
1550 return AMDGPU::isGFX10_BEncoding(getSTI());
1551 }
1552
1553 bool isWave32() const { return getAvailableFeatures()[Feature_isWave32Bit]; }
1554
1555 bool isWave64() const { return getAvailableFeatures()[Feature_isWave64Bit]; }
1556
1557 bool hasInv2PiInlineImm() const {
1558 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1559 }
1560
1561 bool has64BitLiterals() const {
1562 return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1563 }
1564
1565 bool hasFlatOffsets() const {
1566 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1567 }
1568
1569 bool hasTrue16Insts() const {
1570 return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1571 }
1572
1573 bool hasArchitectedFlatScratch() const {
1574 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1575 }
1576
1577 bool hasSGPR102_SGPR103() const {
1578 return !isVI() && !isGFX9();
1579 }
1580
1581 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1582
1583 bool hasIntClamp() const {
1584 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1585 }
1586
1587 bool hasPartialNSAEncoding() const {
1588 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1589 }
1590
1591 bool hasGloballyAddressableScratch() const {
1592 return getFeatureBits()[AMDGPU::FeatureGloballyAddressableScratch];
1593 }
1594
1595 unsigned getNSAMaxSize(bool HasSampler = false) const {
1596 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1597 }
1598
1599 unsigned getMaxNumUserSGPRs() const {
1600 return AMDGPU::getMaxNumUserSGPRs(getSTI());
1601 }
1602
1603 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1604
1605 AMDGPUTargetStreamer &getTargetStreamer() {
1606 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1607 return static_cast<AMDGPUTargetStreamer &>(TS);
1608 }
1609
1610 MCContext &getContext() const {
1611 // We need this const_cast because for some reason getContext() is not const
1612 // in MCAsmParser.
1613 return const_cast<AMDGPUAsmParser *>(this)->MCTargetAsmParser::getContext();
1614 }
1615
1616 const MCRegisterInfo *getMRI() const {
1617 return getContext().getRegisterInfo();
1618 }
1619
1620 const MCInstrInfo *getMII() const {
1621 return &MII;
1622 }
1623
1624 // FIXME: This should not be used. Instead, should use queries derived from
1625 // getAvailableFeatures().
1626 const FeatureBitset &getFeatureBits() const {
1627 return getSTI().getFeatureBits();
1628 }
1629
1630 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1631 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1632 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1633
1634 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1635 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1636 bool isForcedDPP() const { return ForcedDPP; }
1637 bool isForcedSDWA() const { return ForcedSDWA; }
1638 ArrayRef<unsigned> getMatchedVariants() const;
1639 StringRef getMatchedVariantName() const;
1640
1641 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1642 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1643 bool RestoreOnFailure);
1644 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1645 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1646 SMLoc &EndLoc) override;
1647 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1648 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1649 unsigned Kind) override;
1650 bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1651 OperandVector &Operands, MCStreamer &Out,
1652 uint64_t &ErrorInfo,
1653 bool MatchingInlineAsm) override;
1654 bool ParseDirective(AsmToken DirectiveID) override;
1655 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1656 OperandMode Mode = OperandMode_Default);
1657 StringRef parseMnemonicSuffix(StringRef Name);
1658 bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1659 SMLoc NameLoc, OperandVector &Operands) override;
1660 //bool ProcessInstruction(MCInst &Inst);
1661
1662 ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1663
1664 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1665
1666 ParseStatus
1667 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1668 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1669 std::function<bool(int64_t &)> ConvertResult = nullptr);
1670
1671 ParseStatus parseOperandArrayWithPrefix(
1672 const char *Prefix, OperandVector &Operands,
1673 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1674 bool (*ConvertResult)(int64_t &) = nullptr);
1675
1676 ParseStatus
1677 parseNamedBit(StringRef Name, OperandVector &Operands,
1678 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1679 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1680 ParseStatus parseCPol(OperandVector &Operands);
1681 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1682 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1683 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1684 SMLoc &StringLoc);
1685 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1686 StringRef Name,
1687 ArrayRef<const char *> Ids,
1688 int64_t &IntVal);
1689 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1690 StringRef Name,
1691 ArrayRef<const char *> Ids,
1692 AMDGPUOperand::ImmTy Type);
1693
1694 bool isModifier();
1695 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1696 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1697 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1698 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1699 bool parseSP3NegModifier();
1700 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1701 LitModifier Lit = LitModifier::None);
1702 ParseStatus parseReg(OperandVector &Operands);
1703 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1704 LitModifier Lit = LitModifier::None);
1705 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1706 bool AllowImm = true);
1707 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1708 bool AllowImm = true);
1709 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1710 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1711 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1712 ParseStatus tryParseIndexKey(OperandVector &Operands,
1713 AMDGPUOperand::ImmTy ImmTy);
1714 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1715 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1716 ParseStatus parseIndexKey32bit(OperandVector &Operands);
1717 ParseStatus tryParseMatrixFMT(OperandVector &Operands, StringRef Name,
1718 AMDGPUOperand::ImmTy Type);
1719 ParseStatus parseMatrixAFMT(OperandVector &Operands);
1720 ParseStatus parseMatrixBFMT(OperandVector &Operands);
1721 ParseStatus tryParseMatrixScale(OperandVector &Operands, StringRef Name,
1722 AMDGPUOperand::ImmTy Type);
1723 ParseStatus parseMatrixAScale(OperandVector &Operands);
1724 ParseStatus parseMatrixBScale(OperandVector &Operands);
1725 ParseStatus tryParseMatrixScaleFmt(OperandVector &Operands, StringRef Name,
1726 AMDGPUOperand::ImmTy Type);
1727 ParseStatus parseMatrixAScaleFmt(OperandVector &Operands);
1728 ParseStatus parseMatrixBScaleFmt(OperandVector &Operands);
1729
1730 ParseStatus parseDfmtNfmt(int64_t &Format);
1731 ParseStatus parseUfmt(int64_t &Format);
1732 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1733 int64_t &Format);
1734 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1735 int64_t &Format);
1736 ParseStatus parseFORMAT(OperandVector &Operands);
1737 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1738 ParseStatus parseNumericFormat(int64_t &Format);
1739 ParseStatus parseFlatOffset(OperandVector &Operands);
1740 ParseStatus parseR128A16(OperandVector &Operands);
1741 ParseStatus parseBLGP(OperandVector &Operands);
1742 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1743 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1744
1745 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1746
1747 bool parseCnt(int64_t &IntVal);
1748 ParseStatus parseSWaitCnt(OperandVector &Operands);
1749
1750 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1751 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1752 ParseStatus parseDepCtr(OperandVector &Operands);
1753
1754 bool parseDelay(int64_t &Delay);
1755 ParseStatus parseSDelayALU(OperandVector &Operands);
1756
1757 ParseStatus parseHwreg(OperandVector &Operands);
1758
1759private:
1760 struct OperandInfoTy {
1761 SMLoc Loc;
1762 int64_t Val;
1763 bool IsSymbolic = false;
1764 bool IsDefined = false;
1765
1766 OperandInfoTy(int64_t Val) : Val(Val) {}
1767 };
1768
1769 struct StructuredOpField : OperandInfoTy {
1770 StringLiteral Id;
1771 StringLiteral Desc;
1772 unsigned Width;
1773 bool IsDefined = false;
1774
1775 StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width,
1776 int64_t Default)
1777 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1778 virtual ~StructuredOpField() = default;
1779
1780 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1781 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1782 return false;
1783 }
1784
1785 virtual bool validate(AMDGPUAsmParser &Parser) const {
1786 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1787 return Error(Parser, "not supported on this GPU");
1788 if (!isUIntN(Width, Val))
1789 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1790 return true;
1791 }
1792 };
1793
1794 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1795 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1796
1797 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1798 bool validateSendMsg(const OperandInfoTy &Msg,
1799 const OperandInfoTy &Op,
1800 const OperandInfoTy &Stream);
1801
1802 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1803 OperandInfoTy &Width);
1804
1805 static SMLoc getLaterLoc(SMLoc a, SMLoc b);
1806
1807 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1808 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1809 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1810
1811 SMLoc getOperandLoc(const OperandVector &Operands, int MCOpIdx) const;
1812 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1813 const OperandVector &Operands) const;
1814 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type,
1815 const OperandVector &Operands) const;
1816 SMLoc getInstLoc(const OperandVector &Operands) const;
1817
1818 bool validateInstruction(const MCInst &Inst, SMLoc IDLoc,
1819 const OperandVector &Operands);
1820 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1821 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1822 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1823 bool validateSOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1824 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1825 std::optional<unsigned> checkVOPDRegBankConstraints(const MCInst &Inst,
1826 bool AsVOPD3);
1827 bool validateVOPD(const MCInst &Inst, const OperandVector &Operands);
1828 bool tryVOPD(const MCInst &Inst);
1829 bool tryVOPD3(const MCInst &Inst);
1830 bool tryAnotherVOPDEncoding(const MCInst &Inst);
1831
1832 bool validateIntClampSupported(const MCInst &Inst);
1833 bool validateMIMGAtomicDMask(const MCInst &Inst);
1834 bool validateMIMGGatherDMask(const MCInst &Inst);
1835 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1836 bool validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc);
1837 bool validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc);
1838 bool validateMIMGD16(const MCInst &Inst);
1839 bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);
1840 bool validateTensorR128(const MCInst &Inst);
1841 bool validateMIMGMSAA(const MCInst &Inst);
1842 bool validateOpSel(const MCInst &Inst);
1843 bool validateTrue16OpSel(const MCInst &Inst);
1844 bool validateNeg(const MCInst &Inst, AMDGPU::OpName OpName);
1845 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1846 bool validateVccOperand(MCRegister Reg) const;
1847 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1848 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1849 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1850 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1851 bool validateAGPRLdSt(const MCInst &Inst) const;
1852 bool validateVGPRAlign(const MCInst &Inst) const;
1853 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1854 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1855 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1856 bool validateDivScale(const MCInst &Inst);
1857 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1858 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1859 SMLoc IDLoc);
1860 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1861 const unsigned CPol);
1862 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1863 bool validateSetVgprMSB(const MCInst &Inst, const OperandVector &Operands);
1864 bool validateLdsDirect(const MCInst &Inst, const OperandVector &Operands);
1865 bool validateWMMA(const MCInst &Inst, const OperandVector &Operands);
1866 unsigned getConstantBusLimit(unsigned Opcode) const;
1867 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1868 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1869 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1870
1871 bool isSupportedMnemo(StringRef Mnemo,
1872 const FeatureBitset &FBS);
1873 bool isSupportedMnemo(StringRef Mnemo,
1874 const FeatureBitset &FBS,
1875 ArrayRef<unsigned> Variants);
1876 bool checkUnsupportedInstruction(StringRef Name, SMLoc IDLoc);
1877
1878 bool isId(const StringRef Id) const;
1879 bool isId(const AsmToken &Token, const StringRef Id) const;
1880 bool isToken(const AsmToken::TokenKind Kind) const;
1881 StringRef getId() const;
1882 bool trySkipId(const StringRef Id);
1883 bool trySkipId(const StringRef Pref, const StringRef Id);
1884 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1885 bool trySkipToken(const AsmToken::TokenKind Kind);
1886 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1887 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1888 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1889
1890 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1891 AsmToken::TokenKind getTokenKind() const;
1892 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1894 StringRef getTokenStr() const;
1895 AsmToken peekToken(bool ShouldSkipSpace = true);
1896 AsmToken getToken() const;
1897 SMLoc getLoc() const;
1898 void lex();
1899
1900public:
1901 void onBeginOfFile() override;
1902 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1903
1904 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1905
1906 ParseStatus parseExpTgt(OperandVector &Operands);
1907 ParseStatus parseSendMsg(OperandVector &Operands);
1908 ParseStatus parseInterpSlot(OperandVector &Operands);
1909 ParseStatus parseInterpAttr(OperandVector &Operands);
1910 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1911 ParseStatus parseBoolReg(OperandVector &Operands);
1912
1913 bool parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
1914 const unsigned MaxVal, const Twine &ErrMsg,
1915 SMLoc &Loc);
1916 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1917 const unsigned MinVal,
1918 const unsigned MaxVal,
1919 const StringRef ErrMsg);
1920 ParseStatus parseSwizzle(OperandVector &Operands);
1921 bool parseSwizzleOffset(int64_t &Imm);
1922 bool parseSwizzleMacro(int64_t &Imm);
1923 bool parseSwizzleQuadPerm(int64_t &Imm);
1924 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1925 bool parseSwizzleBroadcast(int64_t &Imm);
1926 bool parseSwizzleSwap(int64_t &Imm);
1927 bool parseSwizzleReverse(int64_t &Imm);
1928 bool parseSwizzleFFT(int64_t &Imm);
1929 bool parseSwizzleRotate(int64_t &Imm);
1930
1931 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1932 int64_t parseGPRIdxMacro();
1933
1934 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1935 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1936
1937 ParseStatus parseOModSI(OperandVector &Operands);
1938
1939 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1940 OptionalImmIndexMap &OptionalIdx);
1941 void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands);
1942 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1943 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1944 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1945 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1946
1947 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1948 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1949 OptionalImmIndexMap &OptionalIdx);
1950 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1951 OptionalImmIndexMap &OptionalIdx);
1952
1953 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1954 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1955 void cvtOpSelHelper(MCInst &Inst, unsigned OpSel);
1956
1957 bool parseDimId(unsigned &Encoding);
1958 ParseStatus parseDim(OperandVector &Operands);
1959 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1960 ParseStatus parseDPP8(OperandVector &Operands);
1961 ParseStatus parseDPPCtrl(OperandVector &Operands);
1962 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1963 int64_t parseDPPCtrlSel(StringRef Ctrl);
1964 int64_t parseDPPCtrlPerm();
1965 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1966 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1967 cvtDPP(Inst, Operands, true);
1968 }
1969 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1970 bool IsDPP8 = false);
1971 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1972 cvtVOP3DPP(Inst, Operands, true);
1973 }
1974
1975 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1976 AMDGPUOperand::ImmTy Type);
1977 ParseStatus parseSDWADstUnused(OperandVector &Operands);
1978 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1979 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1980 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1981 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1982 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1983 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1984 uint64_t BasicInstType,
1985 bool SkipDstVcc = false,
1986 bool SkipSrcVcc = false);
1987
1988 ParseStatus parseEndpgm(OperandVector &Operands);
1989
1990 ParseStatus parseVOPD(OperandVector &Operands);
1991};
1992
1993} // end anonymous namespace
1994
1995// May be called with integer type with equivalent bitwidth.
1996static const fltSemantics *getFltSemantics(unsigned Size) {
1997 switch (Size) {
1998 case 4:
1999 return &APFloat::IEEEsingle();
2000 case 8:
2001 return &APFloat::IEEEdouble();
2002 case 2:
2003 return &APFloat::IEEEhalf();
2004 default:
2005 llvm_unreachable("unsupported fp type");
2006 }
2007}
2008
2010 return getFltSemantics(VT.getSizeInBits() / 8);
2011}
2012
2014 switch (OperandType) {
2015 // When floating-point immediate is used as operand of type i16, the 32-bit
2016 // representation of the constant truncated to the 16 LSBs should be used.
2031 return &APFloat::IEEEsingle();
2038 return &APFloat::IEEEdouble();
2045 return &APFloat::IEEEhalf();
2050 return &APFloat::BFloat();
2051 default:
2052 llvm_unreachable("unsupported fp type");
2053 }
2054}
2055
2056//===----------------------------------------------------------------------===//
2057// Operand
2058//===----------------------------------------------------------------------===//
2059
2060static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
2061 bool Lost;
2062
2063 // Convert literal to single precision
2066 &Lost);
2067 // We allow precision lost but not overflow or underflow
2068 if (Status != APFloat::opOK &&
2069 Lost &&
2070 ((Status & APFloat::opOverflow) != 0 ||
2071 (Status & APFloat::opUnderflow) != 0)) {
2072 return false;
2073 }
2074
2075 return true;
2076}
2077
2078static bool isSafeTruncation(int64_t Val, unsigned Size) {
2079 return isUIntN(Size, Val) || isIntN(Size, Val);
2080}
2081
2082static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2083 if (VT.getScalarType() == MVT::i16)
2084 return isInlinableLiteral32(Val, HasInv2Pi);
2085
2086 if (VT.getScalarType() == MVT::f16)
2087 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2088
2089 assert(VT.getScalarType() == MVT::bf16);
2090
2091 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2092}
2093
2094bool AMDGPUOperand::isInlinableImm(MVT type) const {
2095
2096 // This is a hack to enable named inline values like
2097 // shared_base with both 32-bit and 64-bit operands.
2098 // Note that these values are defined as
2099 // 32-bit operands only.
2100 if (isInlineValue()) {
2101 return true;
2102 }
2103
2104 if (!isImmTy(ImmTyNone)) {
2105 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2106 return false;
2107 }
2108 // TODO: We should avoid using host float here. It would be better to
2109 // check the float bit values which is what a few other places do.
2110 // We've had bot failures before due to weird NaN support on mips hosts.
2111
2112 APInt Literal(64, Imm.Val);
2113
2114 if (Imm.IsFPImm) { // We got fp literal token
2115 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2117 AsmParser->hasInv2PiInlineImm());
2118 }
2119
2120 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2121 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2122 return false;
2123
2124 if (type.getScalarSizeInBits() == 16) {
2125 bool Lost = false;
2126 switch (type.getScalarType().SimpleTy) {
2127 default:
2128 llvm_unreachable("unknown 16-bit type");
2129 case MVT::bf16:
2130 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2131 &Lost);
2132 break;
2133 case MVT::f16:
2134 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2135 &Lost);
2136 break;
2137 case MVT::i16:
2138 FPLiteral.convert(APFloatBase::IEEEsingle(),
2139 APFloat::rmNearestTiesToEven, &Lost);
2140 break;
2141 }
2142 // We need to use 32-bit representation here because when a floating-point
2143 // inline constant is used as an i16 operand, its 32-bit representation
2144 // representation will be used. We will need the 32-bit value to check if
2145 // it is FP inline constant.
2146 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2147 return isInlineableLiteralOp16(ImmVal, type,
2148 AsmParser->hasInv2PiInlineImm());
2149 }
2150
2151 // Check if single precision literal is inlinable
2153 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2154 AsmParser->hasInv2PiInlineImm());
2155 }
2156
2157 // We got int literal token.
2158 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2160 AsmParser->hasInv2PiInlineImm());
2161 }
2162
2163 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2164 return false;
2165 }
2166
2167 if (type.getScalarSizeInBits() == 16) {
2169 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2170 type, AsmParser->hasInv2PiInlineImm());
2171 }
2172
2174 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2175 AsmParser->hasInv2PiInlineImm());
2176}
2177
2178bool AMDGPUOperand::isLiteralImm(MVT type) const {
2179 // Check that this immediate can be added as literal
2180 if (!isImmTy(ImmTyNone)) {
2181 return false;
2182 }
2183
2184 bool Allow64Bit =
2185 (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();
2186
2187 if (!Imm.IsFPImm) {
2188 // We got int literal token.
2189
2190 if (type == MVT::f64 && hasFPModifiers()) {
2191 // Cannot apply fp modifiers to int literals preserving the same semantics
2192 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2193 // disable these cases.
2194 return false;
2195 }
2196
2197 unsigned Size = type.getSizeInBits();
2198 if (Size == 64) {
2199 if (Allow64Bit && !AMDGPU::isValid32BitLiteral(Imm.Val, false))
2200 return true;
2201 Size = 32;
2202 }
2203
2204 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2205 // types.
2206 return isSafeTruncation(Imm.Val, Size);
2207 }
2208
2209 // We got fp literal token
2210 if (type == MVT::f64) { // Expected 64-bit fp operand
2211 // We would set low 64-bits of literal to zeroes but we accept this literals
2212 return true;
2213 }
2214
2215 if (type == MVT::i64) { // Expected 64-bit int operand
2216 // We don't allow fp literals in 64-bit integer instructions. It is
2217 // unclear how we should encode them.
2218 return false;
2219 }
2220
2221 // We allow fp literals with f16x2 operands assuming that the specified
2222 // literal goes into the lower half and the upper half is zero. We also
2223 // require that the literal may be losslessly converted to f16.
2224 //
2225 // For i16x2 operands, we assume that the specified literal is encoded as a
2226 // single-precision float. This is pretty odd, but it matches SP3 and what
2227 // happens in hardware.
2228 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2229 : (type == MVT::v2i16) ? MVT::f32
2230 : (type == MVT::v2f32) ? MVT::f32
2231 : type;
2232
2233 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2234 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2235}
2236
2237bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2238 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2239}
2240
2241bool AMDGPUOperand::isVRegWithInputMods() const {
2242 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2243 // GFX90A allows DPP on 64-bit operands.
2244 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2245 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2246}
2247
2248template <bool IsFake16>
2249bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {
2250 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2251 : AMDGPU::VGPR_16_Lo128RegClassID);
2252}
2253
2254template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2255 return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2256 : AMDGPU::VGPR_16RegClassID);
2257}
2258
2259bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2260 if (AsmParser->isVI())
2261 return isVReg32();
2262 if (AsmParser->isGFX9Plus())
2263 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2264 return false;
2265}
2266
2267bool AMDGPUOperand::isSDWAFP16Operand() const {
2268 return isSDWAOperand(MVT::f16);
2269}
2270
2271bool AMDGPUOperand::isSDWAFP32Operand() const {
2272 return isSDWAOperand(MVT::f32);
2273}
2274
2275bool AMDGPUOperand::isSDWAInt16Operand() const {
2276 return isSDWAOperand(MVT::i16);
2277}
2278
2279bool AMDGPUOperand::isSDWAInt32Operand() const {
2280 return isSDWAOperand(MVT::i32);
2281}
2282
2283bool AMDGPUOperand::isBoolReg() const {
2284 return isReg() && ((AsmParser->isWave64() && isSCSrc_b64()) ||
2285 (AsmParser->isWave32() && isSCSrc_b32()));
2286}
2287
2288uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2289{
2290 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2291 assert(Size == 2 || Size == 4 || Size == 8);
2292
2293 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2294
2295 if (Imm.Mods.Abs) {
2296 Val &= ~FpSignMask;
2297 }
2298 if (Imm.Mods.Neg) {
2299 Val ^= FpSignMask;
2300 }
2301
2302 return Val;
2303}
2304
2305void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2306 MCOpIdx = Inst.getNumOperands();
2307
2308 if (isExpr()) {
2310 return;
2311 }
2312
2313 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2314 Inst.getNumOperands())) {
2315 addLiteralImmOperand(Inst, Imm.Val,
2316 ApplyModifiers &
2317 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2318 } else {
2319 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2321 }
2322}
2323
2324void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2325 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2326 auto OpNum = Inst.getNumOperands();
2327 // Check that this operand accepts literals
2328 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2329
2330 if (ApplyModifiers) {
2331 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2332 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2333 Val = applyInputFPModifiers(Val, Size);
2334 }
2335
2336 APInt Literal(64, Val);
2337 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2338
2339 bool CanUse64BitLiterals =
2340 AsmParser->has64BitLiterals() &&
2341 !(InstDesc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P));
2342 MCContext &Ctx = AsmParser->getContext();
2343
2344 if (Imm.IsFPImm) { // We got fp literal token
2345 switch (OpTy) {
2351 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2352 AsmParser->hasInv2PiInlineImm())) {
2353 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2354 return;
2355 }
2356
2357 // Non-inlineable
2358 if (AMDGPU::isSISrcFPOperand(InstDesc,
2359 OpNum)) { // Expected 64-bit fp operand
2360 bool HasMandatoryLiteral =
2361 AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::imm);
2362 // For fp operands we check if low 32 bits are zeros
2363 if (Literal.getLoBits(32) != 0 &&
2364 (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
2365 !HasMandatoryLiteral) {
2366 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(
2367 Inst.getLoc(),
2368 "Can't encode literal as exact 64-bit floating-point operand. "
2369 "Low 32-bits will be set to zero");
2370 Val &= 0xffffffff00000000u;
2371 }
2372
2373 if ((OpTy == AMDGPU::OPERAND_REG_IMM_FP64 ||
2376 CanUse64BitLiterals && Lo_32(Val) != 0) {
2378 AMDGPUMCExpr::createLit(LitModifier::Lit64, Val, Ctx)));
2379 } else {
2381 }
2382 return;
2383 }
2384
2385 // We don't allow fp literals in 64-bit integer instructions. It is
2386 // unclear how we should encode them. This case should be checked earlier
2387 // in predicate methods (isLiteralImm())
2388 llvm_unreachable("fp literal in 64-bit integer instruction.");
2389
2391 if (CanUse64BitLiterals && Lo_32(Val) != 0) {
2393 AMDGPUMCExpr::createLit(LitModifier::Lit64, Val, Ctx)));
2394 } else {
2396 }
2397 return;
2398
2403 if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) {
2404 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2405 // loss of precision. The constant represents ideomatic fp32 value of
2406 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2407 // bits. Prevent rounding below.
2408 Inst.addOperand(MCOperand::createImm(0x3e22));
2409 return;
2410 }
2411 [[fallthrough]];
2412
2433 bool lost;
2434 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2435 // Convert literal to single precision
2436 FPLiteral.convert(*getOpFltSemantics(OpTy),
2437 APFloat::rmNearestTiesToEven, &lost);
2438 // We allow precision lost but not overflow or underflow. This should be
2439 // checked earlier in isLiteralImm()
2440
2441 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2442 Inst.addOperand(MCOperand::createImm(ImmVal));
2443 return;
2444 }
2445 default:
2446 llvm_unreachable("invalid operand size");
2447 }
2448
2449 return;
2450 }
2451
2452 // We got int literal token.
2453 // Only sign extend inline immediates.
2454 switch (OpTy) {
2469 return;
2470
2473 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2475 return;
2476 }
2477
2478 // When the 32 MSBs are not zero (effectively means it can't be safely
2479 // truncated to uint32_t), if the target doesn't support 64-bit literals, or
2480 // the lit modifier is explicitly used, we need to truncate it to the 32
2481 // LSBs.
2482 if (!AsmParser->has64BitLiterals() ||
2483 getModifiers().Lit == LitModifier::Lit)
2484 Val = Lo_32(Val);
2485
2486 if (CanUse64BitLiterals && (!isInt<32>(Val) || !isUInt<32>(Val))) {
2488 AMDGPUMCExpr::createLit(LitModifier::Lit64, Val, Ctx)));
2489 } else {
2491 }
2492 return;
2493
2497 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2499 return;
2500 }
2501
2502 // If the target doesn't support 64-bit literals, we need to use the
2503 // constant as the high 32 MSBs of a double-precision floating point value.
2504 if (!AsmParser->has64BitLiterals()) {
2505 Val = static_cast<uint64_t>(Val) << 32;
2506 } else {
2507 // Now the target does support 64-bit literals, there are two cases
2508 // where we still want to use src_literal encoding:
2509 // 1) explicitly forced by using lit modifier;
2510 // 2) the value is a valid 32-bit representation (signed or unsigned),
2511 // meanwhile not forced by lit64 modifier.
2512 if (getModifiers().Lit == LitModifier::Lit ||
2513 (getModifiers().Lit != LitModifier::Lit64 &&
2514 (isInt<32>(Val) || isUInt<32>(Val))))
2515 Val = static_cast<uint64_t>(Val) << 32;
2516 }
2517
2518 if (CanUse64BitLiterals && Lo_32(Val) != 0) {
2520 AMDGPUMCExpr::createLit(LitModifier::Lit64, Val, Ctx)));
2521 } else {
2523 }
2524 return;
2525
2538 return;
2539
2541 if ((isInt<32>(Val) || isUInt<32>(Val)) &&
2542 getModifiers().Lit != LitModifier::Lit64)
2543 Val <<= 32;
2544
2545 if (CanUse64BitLiterals && Lo_32(Val) != 0) {
2547 AMDGPUMCExpr::createLit(LitModifier::Lit64, Val, Ctx)));
2548 } else {
2550 }
2551 return;
2552
2553 default:
2554 llvm_unreachable("invalid operand type");
2555 }
2556}
2557
2558void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2559 MCOpIdx = Inst.getNumOperands();
2560 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2561}
2562
2563bool AMDGPUOperand::isInlineValue() const {
2564 return isRegKind() && ::isInlineValue(getReg());
2565}
2566
2567//===----------------------------------------------------------------------===//
2568// AsmParser
2569//===----------------------------------------------------------------------===//
2570
2571void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2572 // TODO: make those pre-defined variables read-only.
2573 // Currently there is none suitable machinery in the core llvm-mc for this.
2574 // MCSymbol::isRedefinable is intended for another purpose, and
2575 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2576 MCContext &Ctx = getContext();
2577 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2579}
2580
2581static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2582 if (Is == IS_VGPR) {
2583 switch (RegWidth) {
2584 default: return -1;
2585 case 32:
2586 return AMDGPU::VGPR_32RegClassID;
2587 case 64:
2588 return AMDGPU::VReg_64RegClassID;
2589 case 96:
2590 return AMDGPU::VReg_96RegClassID;
2591 case 128:
2592 return AMDGPU::VReg_128RegClassID;
2593 case 160:
2594 return AMDGPU::VReg_160RegClassID;
2595 case 192:
2596 return AMDGPU::VReg_192RegClassID;
2597 case 224:
2598 return AMDGPU::VReg_224RegClassID;
2599 case 256:
2600 return AMDGPU::VReg_256RegClassID;
2601 case 288:
2602 return AMDGPU::VReg_288RegClassID;
2603 case 320:
2604 return AMDGPU::VReg_320RegClassID;
2605 case 352:
2606 return AMDGPU::VReg_352RegClassID;
2607 case 384:
2608 return AMDGPU::VReg_384RegClassID;
2609 case 512:
2610 return AMDGPU::VReg_512RegClassID;
2611 case 1024:
2612 return AMDGPU::VReg_1024RegClassID;
2613 }
2614 } else if (Is == IS_TTMP) {
2615 switch (RegWidth) {
2616 default: return -1;
2617 case 32:
2618 return AMDGPU::TTMP_32RegClassID;
2619 case 64:
2620 return AMDGPU::TTMP_64RegClassID;
2621 case 128:
2622 return AMDGPU::TTMP_128RegClassID;
2623 case 256:
2624 return AMDGPU::TTMP_256RegClassID;
2625 case 512:
2626 return AMDGPU::TTMP_512RegClassID;
2627 }
2628 } else if (Is == IS_SGPR) {
2629 switch (RegWidth) {
2630 default: return -1;
2631 case 32:
2632 return AMDGPU::SGPR_32RegClassID;
2633 case 64:
2634 return AMDGPU::SGPR_64RegClassID;
2635 case 96:
2636 return AMDGPU::SGPR_96RegClassID;
2637 case 128:
2638 return AMDGPU::SGPR_128RegClassID;
2639 case 160:
2640 return AMDGPU::SGPR_160RegClassID;
2641 case 192:
2642 return AMDGPU::SGPR_192RegClassID;
2643 case 224:
2644 return AMDGPU::SGPR_224RegClassID;
2645 case 256:
2646 return AMDGPU::SGPR_256RegClassID;
2647 case 288:
2648 return AMDGPU::SGPR_288RegClassID;
2649 case 320:
2650 return AMDGPU::SGPR_320RegClassID;
2651 case 352:
2652 return AMDGPU::SGPR_352RegClassID;
2653 case 384:
2654 return AMDGPU::SGPR_384RegClassID;
2655 case 512:
2656 return AMDGPU::SGPR_512RegClassID;
2657 }
2658 } else if (Is == IS_AGPR) {
2659 switch (RegWidth) {
2660 default: return -1;
2661 case 32:
2662 return AMDGPU::AGPR_32RegClassID;
2663 case 64:
2664 return AMDGPU::AReg_64RegClassID;
2665 case 96:
2666 return AMDGPU::AReg_96RegClassID;
2667 case 128:
2668 return AMDGPU::AReg_128RegClassID;
2669 case 160:
2670 return AMDGPU::AReg_160RegClassID;
2671 case 192:
2672 return AMDGPU::AReg_192RegClassID;
2673 case 224:
2674 return AMDGPU::AReg_224RegClassID;
2675 case 256:
2676 return AMDGPU::AReg_256RegClassID;
2677 case 288:
2678 return AMDGPU::AReg_288RegClassID;
2679 case 320:
2680 return AMDGPU::AReg_320RegClassID;
2681 case 352:
2682 return AMDGPU::AReg_352RegClassID;
2683 case 384:
2684 return AMDGPU::AReg_384RegClassID;
2685 case 512:
2686 return AMDGPU::AReg_512RegClassID;
2687 case 1024:
2688 return AMDGPU::AReg_1024RegClassID;
2689 }
2690 }
2691 return -1;
2692}
2693
2696 .Case("exec", AMDGPU::EXEC)
2697 .Case("vcc", AMDGPU::VCC)
2698 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2699 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2700 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2701 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2702 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2703 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2704 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2705 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2706 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2707 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2708 .Case("src_flat_scratch_base_lo", AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)
2709 .Case("src_flat_scratch_base_hi", AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)
2710 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2711 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2712 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2713 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2714 .Case("m0", AMDGPU::M0)
2715 .Case("vccz", AMDGPU::SRC_VCCZ)
2716 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2717 .Case("execz", AMDGPU::SRC_EXECZ)
2718 .Case("src_execz", AMDGPU::SRC_EXECZ)
2719 .Case("scc", AMDGPU::SRC_SCC)
2720 .Case("src_scc", AMDGPU::SRC_SCC)
2721 .Case("tba", AMDGPU::TBA)
2722 .Case("tma", AMDGPU::TMA)
2723 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2724 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2725 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2726 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2727 .Case("vcc_lo", AMDGPU::VCC_LO)
2728 .Case("vcc_hi", AMDGPU::VCC_HI)
2729 .Case("exec_lo", AMDGPU::EXEC_LO)
2730 .Case("exec_hi", AMDGPU::EXEC_HI)
2731 .Case("tma_lo", AMDGPU::TMA_LO)
2732 .Case("tma_hi", AMDGPU::TMA_HI)
2733 .Case("tba_lo", AMDGPU::TBA_LO)
2734 .Case("tba_hi", AMDGPU::TBA_HI)
2735 .Case("pc", AMDGPU::PC_REG)
2736 .Case("null", AMDGPU::SGPR_NULL)
2737 .Default(AMDGPU::NoRegister);
2738}
2739
2740bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2741 SMLoc &EndLoc, bool RestoreOnFailure) {
2742 auto R = parseRegister();
2743 if (!R) return true;
2744 assert(R->isReg());
2745 RegNo = R->getReg();
2746 StartLoc = R->getStartLoc();
2747 EndLoc = R->getEndLoc();
2748 return false;
2749}
2750
2751bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2752 SMLoc &EndLoc) {
2753 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2754}
2755
2756ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2757 SMLoc &EndLoc) {
2758 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2759 bool PendingErrors = getParser().hasPendingError();
2760 getParser().clearPendingErrors();
2761 if (PendingErrors)
2762 return ParseStatus::Failure;
2763 if (Result)
2764 return ParseStatus::NoMatch;
2765 return ParseStatus::Success;
2766}
2767
2768bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
2769 RegisterKind RegKind,
2770 MCRegister Reg1, SMLoc Loc) {
2771 switch (RegKind) {
2772 case IS_SPECIAL:
2773 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2774 Reg = AMDGPU::EXEC;
2775 RegWidth = 64;
2776 return true;
2777 }
2778 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2779 Reg = AMDGPU::FLAT_SCR;
2780 RegWidth = 64;
2781 return true;
2782 }
2783 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2784 Reg = AMDGPU::XNACK_MASK;
2785 RegWidth = 64;
2786 return true;
2787 }
2788 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2789 Reg = AMDGPU::VCC;
2790 RegWidth = 64;
2791 return true;
2792 }
2793 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2794 Reg = AMDGPU::TBA;
2795 RegWidth = 64;
2796 return true;
2797 }
2798 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2799 Reg = AMDGPU::TMA;
2800 RegWidth = 64;
2801 return true;
2802 }
2803 Error(Loc, "register does not fit in the list");
2804 return false;
2805 case IS_VGPR:
2806 case IS_SGPR:
2807 case IS_AGPR:
2808 case IS_TTMP:
2809 if (Reg1 != Reg + RegWidth / 32) {
2810 Error(Loc, "registers in a list must have consecutive indices");
2811 return false;
2812 }
2813 RegWidth += 32;
2814 return true;
2815 default:
2816 llvm_unreachable("unexpected register kind");
2817 }
2818}
2819
2820struct RegInfo {
2822 RegisterKind Kind;
2823};
2824
2825static constexpr RegInfo RegularRegisters[] = {
2826 {{"v"}, IS_VGPR},
2827 {{"s"}, IS_SGPR},
2828 {{"ttmp"}, IS_TTMP},
2829 {{"acc"}, IS_AGPR},
2830 {{"a"}, IS_AGPR},
2831};
2832
2833static bool isRegularReg(RegisterKind Kind) {
2834 return Kind == IS_VGPR ||
2835 Kind == IS_SGPR ||
2836 Kind == IS_TTMP ||
2837 Kind == IS_AGPR;
2838}
2839
2841 for (const RegInfo &Reg : RegularRegisters)
2842 if (Str.starts_with(Reg.Name))
2843 return &Reg;
2844 return nullptr;
2845}
2846
2847static bool getRegNum(StringRef Str, unsigned& Num) {
2848 return !Str.getAsInteger(10, Num);
2849}
2850
2851bool
2852AMDGPUAsmParser::isRegister(const AsmToken &Token,
2853 const AsmToken &NextToken) const {
2854
2855 // A list of consecutive registers: [s0,s1,s2,s3]
2856 if (Token.is(AsmToken::LBrac))
2857 return true;
2858
2859 if (!Token.is(AsmToken::Identifier))
2860 return false;
2861
2862 // A single register like s0 or a range of registers like s[0:1]
2863
2864 StringRef Str = Token.getString();
2865 const RegInfo *Reg = getRegularRegInfo(Str);
2866 if (Reg) {
2867 StringRef RegName = Reg->Name;
2868 StringRef RegSuffix = Str.substr(RegName.size());
2869 if (!RegSuffix.empty()) {
2870 RegSuffix.consume_back(".l");
2871 RegSuffix.consume_back(".h");
2872 unsigned Num;
2873 // A single register with an index: rXX
2874 if (getRegNum(RegSuffix, Num))
2875 return true;
2876 } else {
2877 // A range of registers: r[XX:YY].
2878 if (NextToken.is(AsmToken::LBrac))
2879 return true;
2880 }
2881 }
2882
2883 return getSpecialRegForName(Str).isValid();
2884}
2885
2886bool
2887AMDGPUAsmParser::isRegister()
2888{
2889 return isRegister(getToken(), peekToken());
2890}
2891
2892MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2893 unsigned SubReg, unsigned RegWidth,
2894 SMLoc Loc) {
2895 assert(isRegularReg(RegKind));
2896
2897 unsigned AlignSize = 1;
2898 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2899 // SGPR and TTMP registers must be aligned.
2900 // Max required alignment is 4 dwords.
2901 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2902 }
2903
2904 if (RegNum % AlignSize != 0) {
2905 Error(Loc, "invalid register alignment");
2906 return MCRegister();
2907 }
2908
2909 unsigned RegIdx = RegNum / AlignSize;
2910 int RCID = getRegClass(RegKind, RegWidth);
2911 if (RCID == -1) {
2912 Error(Loc, "invalid or unsupported register size");
2913 return MCRegister();
2914 }
2915
2916 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2917 const MCRegisterClass RC = TRI->getRegClass(RCID);
2918 if (RegIdx >= RC.getNumRegs() || (RegKind == IS_VGPR && RegIdx > 255)) {
2919 Error(Loc, "register index is out of range");
2920 return AMDGPU::NoRegister;
2921 }
2922
2923 if (RegKind == IS_VGPR && !isGFX1250() && RegIdx + RegWidth / 32 > 256) {
2924 Error(Loc, "register index is out of range");
2925 return MCRegister();
2926 }
2927
2928 MCRegister Reg = RC.getRegister(RegIdx);
2929
2930 if (SubReg) {
2931 Reg = TRI->getSubReg(Reg, SubReg);
2932
2933 // Currently all regular registers have their .l and .h subregisters, so
2934 // we should never need to generate an error here.
2935 assert(Reg && "Invalid subregister!");
2936 }
2937
2938 return Reg;
2939}
2940
2941bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth,
2942 unsigned &SubReg) {
2943 int64_t RegLo, RegHi;
2944 if (!skipToken(AsmToken::LBrac, "missing register index"))
2945 return false;
2946
2947 SMLoc FirstIdxLoc = getLoc();
2948 SMLoc SecondIdxLoc;
2949
2950 if (!parseExpr(RegLo))
2951 return false;
2952
2953 if (trySkipToken(AsmToken::Colon)) {
2954 SecondIdxLoc = getLoc();
2955 if (!parseExpr(RegHi))
2956 return false;
2957 } else {
2958 RegHi = RegLo;
2959 }
2960
2961 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2962 return false;
2963
2964 if (!isUInt<32>(RegLo)) {
2965 Error(FirstIdxLoc, "invalid register index");
2966 return false;
2967 }
2968
2969 if (!isUInt<32>(RegHi)) {
2970 Error(SecondIdxLoc, "invalid register index");
2971 return false;
2972 }
2973
2974 if (RegLo > RegHi) {
2975 Error(FirstIdxLoc, "first register index should not exceed second index");
2976 return false;
2977 }
2978
2979 if (RegHi == RegLo) {
2980 StringRef RegSuffix = getTokenStr();
2981 if (RegSuffix == ".l") {
2982 SubReg = AMDGPU::lo16;
2983 lex();
2984 } else if (RegSuffix == ".h") {
2985 SubReg = AMDGPU::hi16;
2986 lex();
2987 }
2988 }
2989
2990 Num = static_cast<unsigned>(RegLo);
2991 RegWidth = 32 * ((RegHi - RegLo) + 1);
2992
2993 return true;
2994}
2995
2996MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2997 unsigned &RegNum,
2998 unsigned &RegWidth,
2999 SmallVectorImpl<AsmToken> &Tokens) {
3000 assert(isToken(AsmToken::Identifier));
3001 MCRegister Reg = getSpecialRegForName(getTokenStr());
3002 if (Reg) {
3003 RegNum = 0;
3004 RegWidth = 32;
3005 RegKind = IS_SPECIAL;
3006 Tokens.push_back(getToken());
3007 lex(); // skip register name
3008 }
3009 return Reg;
3010}
3011
3012MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
3013 unsigned &RegNum,
3014 unsigned &RegWidth,
3015 SmallVectorImpl<AsmToken> &Tokens) {
3016 assert(isToken(AsmToken::Identifier));
3017 StringRef RegName = getTokenStr();
3018 auto Loc = getLoc();
3019
3020 const RegInfo *RI = getRegularRegInfo(RegName);
3021 if (!RI) {
3022 Error(Loc, "invalid register name");
3023 return MCRegister();
3024 }
3025
3026 Tokens.push_back(getToken());
3027 lex(); // skip register name
3028
3029 RegKind = RI->Kind;
3030 StringRef RegSuffix = RegName.substr(RI->Name.size());
3031 unsigned SubReg = NoSubRegister;
3032 if (!RegSuffix.empty()) {
3033 if (RegSuffix.consume_back(".l"))
3034 SubReg = AMDGPU::lo16;
3035 else if (RegSuffix.consume_back(".h"))
3036 SubReg = AMDGPU::hi16;
3037
3038 // Single 32-bit register: vXX.
3039 if (!getRegNum(RegSuffix, RegNum)) {
3040 Error(Loc, "invalid register index");
3041 return MCRegister();
3042 }
3043 RegWidth = 32;
3044 } else {
3045 // Range of registers: v[XX:YY]. ":YY" is optional.
3046 if (!ParseRegRange(RegNum, RegWidth, SubReg))
3047 return MCRegister();
3048 }
3049
3050 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
3051}
3052
3053MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
3054 unsigned &RegNum, unsigned &RegWidth,
3055 SmallVectorImpl<AsmToken> &Tokens) {
3056 MCRegister Reg;
3057 auto ListLoc = getLoc();
3058
3059 if (!skipToken(AsmToken::LBrac,
3060 "expected a register or a list of registers")) {
3061 return MCRegister();
3062 }
3063
3064 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
3065
3066 auto Loc = getLoc();
3067 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
3068 return MCRegister();
3069 if (RegWidth != 32) {
3070 Error(Loc, "expected a single 32-bit register");
3071 return MCRegister();
3072 }
3073
3074 for (; trySkipToken(AsmToken::Comma); ) {
3075 RegisterKind NextRegKind;
3076 MCRegister NextReg;
3077 unsigned NextRegNum, NextRegWidth;
3078 Loc = getLoc();
3079
3080 if (!ParseAMDGPURegister(NextRegKind, NextReg,
3081 NextRegNum, NextRegWidth,
3082 Tokens)) {
3083 return MCRegister();
3084 }
3085 if (NextRegWidth != 32) {
3086 Error(Loc, "expected a single 32-bit register");
3087 return MCRegister();
3088 }
3089 if (NextRegKind != RegKind) {
3090 Error(Loc, "registers in a list must be of the same kind");
3091 return MCRegister();
3092 }
3093 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
3094 return MCRegister();
3095 }
3096
3097 if (!skipToken(AsmToken::RBrac,
3098 "expected a comma or a closing square bracket")) {
3099 return MCRegister();
3100 }
3101
3102 if (isRegularReg(RegKind))
3103 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3104
3105 return Reg;
3106}
3107
3108bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3109 MCRegister &Reg, unsigned &RegNum,
3110 unsigned &RegWidth,
3111 SmallVectorImpl<AsmToken> &Tokens) {
3112 auto Loc = getLoc();
3113 Reg = MCRegister();
3114
3115 if (isToken(AsmToken::Identifier)) {
3116 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3117 if (!Reg)
3118 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3119 } else {
3120 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3121 }
3122
3123 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3124 if (!Reg) {
3125 assert(Parser.hasPendingError());
3126 return false;
3127 }
3128
3129 if (!subtargetHasRegister(*TRI, Reg)) {
3130 if (Reg == AMDGPU::SGPR_NULL) {
3131 Error(Loc, "'null' operand is not supported on this GPU");
3132 } else {
3134 " register not available on this GPU");
3135 }
3136 return false;
3137 }
3138
3139 return true;
3140}
3141
3142bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3143 MCRegister &Reg, unsigned &RegNum,
3144 unsigned &RegWidth,
3145 bool RestoreOnFailure /*=false*/) {
3146 Reg = MCRegister();
3147
3149 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3150 if (RestoreOnFailure) {
3151 while (!Tokens.empty()) {
3152 getLexer().UnLex(Tokens.pop_back_val());
3153 }
3154 }
3155 return true;
3156 }
3157 return false;
3158}
3159
3160std::optional<StringRef>
3161AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3162 switch (RegKind) {
3163 case IS_VGPR:
3164 return StringRef(".amdgcn.next_free_vgpr");
3165 case IS_SGPR:
3166 return StringRef(".amdgcn.next_free_sgpr");
3167 default:
3168 return std::nullopt;
3169 }
3170}
3171
3172void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3173 auto SymbolName = getGprCountSymbolName(RegKind);
3174 assert(SymbolName && "initializing invalid register kind");
3175 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3177 Sym->setRedefinable(true);
3178}
3179
3180bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3181 unsigned DwordRegIndex,
3182 unsigned RegWidth) {
3183 // Symbols are only defined for GCN targets
3184 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3185 return true;
3186
3187 auto SymbolName = getGprCountSymbolName(RegKind);
3188 if (!SymbolName)
3189 return true;
3190 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3191
3192 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3193 int64_t OldCount;
3194
3195 if (!Sym->isVariable())
3196 return !Error(getLoc(),
3197 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3198 if (!Sym->getVariableValue()->evaluateAsAbsolute(OldCount))
3199 return !Error(
3200 getLoc(),
3201 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3202
3203 if (OldCount <= NewMax)
3205
3206 return true;
3207}
3208
3209std::unique_ptr<AMDGPUOperand>
3210AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3211 const auto &Tok = getToken();
3212 SMLoc StartLoc = Tok.getLoc();
3213 SMLoc EndLoc = Tok.getEndLoc();
3214 RegisterKind RegKind;
3215 MCRegister Reg;
3216 unsigned RegNum, RegWidth;
3217
3218 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3219 return nullptr;
3220 }
3221 if (isHsaAbi(getSTI())) {
3222 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3223 return nullptr;
3224 } else
3225 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3226 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3227}
3228
3229ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3230 bool HasSP3AbsModifier, LitModifier Lit) {
3231 // TODO: add syntactic sugar for 1/(2*PI)
3232
3233 if (isRegister() || isModifier())
3234 return ParseStatus::NoMatch;
3235
3236 if (Lit == LitModifier::None) {
3237 if (trySkipId("lit"))
3238 Lit = LitModifier::Lit;
3239 else if (trySkipId("lit64"))
3240 Lit = LitModifier::Lit64;
3241
3242 if (Lit != LitModifier::None) {
3243 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3244 return ParseStatus::Failure;
3245 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, Lit);
3246 if (S.isSuccess() &&
3247 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3248 return ParseStatus::Failure;
3249 return S;
3250 }
3251 }
3252
3253 const auto& Tok = getToken();
3254 const auto& NextTok = peekToken();
3255 bool IsReal = Tok.is(AsmToken::Real);
3256 SMLoc S = getLoc();
3257 bool Negate = false;
3258
3259 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3260 lex();
3261 IsReal = true;
3262 Negate = true;
3263 }
3264
3265 AMDGPUOperand::Modifiers Mods;
3266 Mods.Lit = Lit;
3267
3268 if (IsReal) {
3269 // Floating-point expressions are not supported.
3270 // Can only allow floating-point literals with an
3271 // optional sign.
3272
3273 StringRef Num = getTokenStr();
3274 lex();
3275
3276 APFloat RealVal(APFloat::IEEEdouble());
3277 auto roundMode = APFloat::rmNearestTiesToEven;
3278 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3279 return ParseStatus::Failure;
3280 if (Negate)
3281 RealVal.changeSign();
3282
3283 Operands.push_back(
3284 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3285 AMDGPUOperand::ImmTyNone, true));
3286 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3287 Op.setModifiers(Mods);
3288
3289 return ParseStatus::Success;
3290
3291 } else {
3292 int64_t IntVal;
3293 const MCExpr *Expr;
3294 SMLoc S = getLoc();
3295
3296 if (HasSP3AbsModifier) {
3297 // This is a workaround for handling expressions
3298 // as arguments of SP3 'abs' modifier, for example:
3299 // |1.0|
3300 // |-1|
3301 // |1+x|
3302 // This syntax is not compatible with syntax of standard
3303 // MC expressions (due to the trailing '|').
3304 SMLoc EndLoc;
3305 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3306 return ParseStatus::Failure;
3307 } else {
3308 if (Parser.parseExpression(Expr))
3309 return ParseStatus::Failure;
3310 }
3311
3312 if (Expr->evaluateAsAbsolute(IntVal)) {
3313 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3314 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3315 Op.setModifiers(Mods);
3316 } else {
3317 if (Lit != LitModifier::None)
3318 return ParseStatus::NoMatch;
3319 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3320 }
3321
3322 return ParseStatus::Success;
3323 }
3324
3325 return ParseStatus::NoMatch;
3326}
3327
3328ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3329 if (!isRegister())
3330 return ParseStatus::NoMatch;
3331
3332 if (auto R = parseRegister()) {
3333 assert(R->isReg());
3334 Operands.push_back(std::move(R));
3335 return ParseStatus::Success;
3336 }
3337 return ParseStatus::Failure;
3338}
3339
3340ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3341 bool HasSP3AbsMod, LitModifier Lit) {
3342 ParseStatus Res = parseReg(Operands);
3343 if (!Res.isNoMatch())
3344 return Res;
3345 if (isModifier())
3346 return ParseStatus::NoMatch;
3347 return parseImm(Operands, HasSP3AbsMod, Lit);
3348}
3349
3350bool
3351AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3352 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3353 const auto &str = Token.getString();
3354 return str == "abs" || str == "neg" || str == "sext";
3355 }
3356 return false;
3357}
3358
3359bool
3360AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3361 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3362}
3363
3364bool
3365AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3366 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3367}
3368
3369bool
3370AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3371 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3372}
3373
3374// Check if this is an operand modifier or an opcode modifier
3375// which may look like an expression but it is not. We should
3376// avoid parsing these modifiers as expressions. Currently
3377// recognized sequences are:
3378// |...|
3379// abs(...)
3380// neg(...)
3381// sext(...)
3382// -reg
3383// -|...|
3384// -abs(...)
3385// name:...
3386//
3387bool
3388AMDGPUAsmParser::isModifier() {
3389
3390 AsmToken Tok = getToken();
3391 AsmToken NextToken[2];
3392 peekTokens(NextToken);
3393
3394 return isOperandModifier(Tok, NextToken[0]) ||
3395 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3396 isOpcodeModifierWithVal(Tok, NextToken[0]);
3397}
3398
3399// Check if the current token is an SP3 'neg' modifier.
3400// Currently this modifier is allowed in the following context:
3401//
3402// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3403// 2. Before an 'abs' modifier: -abs(...)
3404// 3. Before an SP3 'abs' modifier: -|...|
3405//
3406// In all other cases "-" is handled as a part
3407// of an expression that follows the sign.
3408//
3409// Note: When "-" is followed by an integer literal,
3410// this is interpreted as integer negation rather
3411// than a floating-point NEG modifier applied to N.
3412// Beside being contr-intuitive, such use of floating-point
3413// NEG modifier would have resulted in different meaning
3414// of integer literals used with VOP1/2/C and VOP3,
3415// for example:
3416// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3417// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3418// Negative fp literals with preceding "-" are
3419// handled likewise for uniformity
3420//
3421bool
3422AMDGPUAsmParser::parseSP3NegModifier() {
3423
3424 AsmToken NextToken[2];
3425 peekTokens(NextToken);
3426
3427 if (isToken(AsmToken::Minus) &&
3428 (isRegister(NextToken[0], NextToken[1]) ||
3429 NextToken[0].is(AsmToken::Pipe) ||
3430 isId(NextToken[0], "abs"))) {
3431 lex();
3432 return true;
3433 }
3434
3435 return false;
3436}
3437
3438ParseStatus
3439AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3440 bool AllowImm) {
3441 bool Neg, SP3Neg;
3442 bool Abs, SP3Abs;
3443 SMLoc Loc;
3444
3445 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3446 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3447 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3448
3449 SP3Neg = parseSP3NegModifier();
3450
3451 Loc = getLoc();
3452 Neg = trySkipId("neg");
3453 if (Neg && SP3Neg)
3454 return Error(Loc, "expected register or immediate");
3455 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3456 return ParseStatus::Failure;
3457
3458 Abs = trySkipId("abs");
3459 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3460 return ParseStatus::Failure;
3461
3462 LitModifier Lit = LitModifier::None;
3463 if (trySkipId("lit")) {
3464 Lit = LitModifier::Lit;
3465 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3466 return ParseStatus::Failure;
3467 } else if (trySkipId("lit64")) {
3468 Lit = LitModifier::Lit64;
3469 if (!skipToken(AsmToken::LParen, "expected left paren after lit64"))
3470 return ParseStatus::Failure;
3471 if (!has64BitLiterals())
3472 return Error(Loc, "lit64 is not supported on this GPU");
3473 }
3474
3475 Loc = getLoc();
3476 SP3Abs = trySkipToken(AsmToken::Pipe);
3477 if (Abs && SP3Abs)
3478 return Error(Loc, "expected register or immediate");
3479
3480 ParseStatus Res;
3481 if (AllowImm) {
3482 Res = parseRegOrImm(Operands, SP3Abs, Lit);
3483 } else {
3484 Res = parseReg(Operands);
3485 }
3486 if (!Res.isSuccess())
3487 return (SP3Neg || Neg || SP3Abs || Abs || Lit != LitModifier::None)
3489 : Res;
3490
3491 if (Lit != LitModifier::None && !Operands.back()->isImm())
3492 Error(Loc, "expected immediate with lit modifier");
3493
3494 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3495 return ParseStatus::Failure;
3496 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3497 return ParseStatus::Failure;
3498 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3499 return ParseStatus::Failure;
3500 if (Lit != LitModifier::None &&
3501 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3502 return ParseStatus::Failure;
3503
3504 AMDGPUOperand::Modifiers Mods;
3505 Mods.Abs = Abs || SP3Abs;
3506 Mods.Neg = Neg || SP3Neg;
3507 Mods.Lit = Lit;
3508
3509 if (Mods.hasFPModifiers() || Lit != LitModifier::None) {
3510 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3511 if (Op.isExpr())
3512 return Error(Op.getStartLoc(), "expected an absolute expression");
3513 Op.setModifiers(Mods);
3514 }
3515 return ParseStatus::Success;
3516}
3517
3518ParseStatus
3519AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3520 bool AllowImm) {
3521 bool Sext = trySkipId("sext");
3522 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3523 return ParseStatus::Failure;
3524
3525 ParseStatus Res;
3526 if (AllowImm) {
3527 Res = parseRegOrImm(Operands);
3528 } else {
3529 Res = parseReg(Operands);
3530 }
3531 if (!Res.isSuccess())
3532 return Sext ? ParseStatus::Failure : Res;
3533
3534 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3535 return ParseStatus::Failure;
3536
3537 AMDGPUOperand::Modifiers Mods;
3538 Mods.Sext = Sext;
3539
3540 if (Mods.hasIntModifiers()) {
3541 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3542 if (Op.isExpr())
3543 return Error(Op.getStartLoc(), "expected an absolute expression");
3544 Op.setModifiers(Mods);
3545 }
3546
3547 return ParseStatus::Success;
3548}
3549
3550ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3551 return parseRegOrImmWithFPInputMods(Operands, false);
3552}
3553
3554ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3555 return parseRegOrImmWithIntInputMods(Operands, false);
3556}
3557
3558ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3559 auto Loc = getLoc();
3560 if (trySkipId("off")) {
3561 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3562 AMDGPUOperand::ImmTyOff, false));
3563 return ParseStatus::Success;
3564 }
3565
3566 if (!isRegister())
3567 return ParseStatus::NoMatch;
3568
3569 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3570 if (Reg) {
3571 Operands.push_back(std::move(Reg));
3572 return ParseStatus::Success;
3573 }
3574
3575 return ParseStatus::Failure;
3576}
3577
3578unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3579 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3580
3581 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3582 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3583 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3584 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3585 return Match_InvalidOperand;
3586
3587 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3588 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3589 // v_mac_f32/16 allow only dst_sel == DWORD;
3590 auto OpNum =
3591 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3592 const auto &Op = Inst.getOperand(OpNum);
3593 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3594 return Match_InvalidOperand;
3595 }
3596 }
3597
3598 // Asm can first try to match VOPD or VOPD3. By failing early here with
3599 // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
3600 // Checking later during validateInstruction does not give a chance to retry
3601 // parsing as a different encoding.
3602 if (tryAnotherVOPDEncoding(Inst))
3603 return Match_InvalidOperand;
3604
3605 return Match_Success;
3606}
3607
3617
3618// What asm variants we should check
3619ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3620 if (isForcedDPP() && isForcedVOP3()) {
3621 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3622 return ArrayRef(Variants);
3623 }
3624 if (getForcedEncodingSize() == 32) {
3625 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3626 return ArrayRef(Variants);
3627 }
3628
3629 if (isForcedVOP3()) {
3630 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3631 return ArrayRef(Variants);
3632 }
3633
3634 if (isForcedSDWA()) {
3635 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3637 return ArrayRef(Variants);
3638 }
3639
3640 if (isForcedDPP()) {
3641 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3642 return ArrayRef(Variants);
3643 }
3644
3645 return getAllVariants();
3646}
3647
3648StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3649 if (isForcedDPP() && isForcedVOP3())
3650 return "e64_dpp";
3651
3652 if (getForcedEncodingSize() == 32)
3653 return "e32";
3654
3655 if (isForcedVOP3())
3656 return "e64";
3657
3658 if (isForcedSDWA())
3659 return "sdwa";
3660
3661 if (isForcedDPP())
3662 return "dpp";
3663
3664 return "";
3665}
3666
3667unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3668 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3669 for (MCPhysReg Reg : Desc.implicit_uses()) {
3670 switch (Reg) {
3671 case AMDGPU::FLAT_SCR:
3672 case AMDGPU::VCC:
3673 case AMDGPU::VCC_LO:
3674 case AMDGPU::VCC_HI:
3675 case AMDGPU::M0:
3676 return Reg;
3677 default:
3678 break;
3679 }
3680 }
3681 return AMDGPU::NoRegister;
3682}
3683
3684// NB: This code is correct only when used to check constant
3685// bus limitations because GFX7 support no f16 inline constants.
3686// Note that there are no cases when a GFX7 opcode violates
3687// constant bus limitations due to the use of an f16 constant.
3688bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3689 unsigned OpIdx) const {
3690 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3691
3694 return false;
3695 }
3696
3697 const MCOperand &MO = Inst.getOperand(OpIdx);
3698
3699 int64_t Val = MO.isImm() ? MO.getImm() : getLitValue(MO.getExpr());
3700 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3701
3702 switch (OpSize) { // expected operand size
3703 case 8:
3704 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3705 case 4:
3706 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3707 case 2: {
3708 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3711 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3712
3716
3720
3724
3727 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3728
3731 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3732
3734 return false;
3735
3736 llvm_unreachable("invalid operand type");
3737 }
3738 default:
3739 llvm_unreachable("invalid operand size");
3740 }
3741}
3742
3743unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3744 if (!isGFX10Plus())
3745 return 1;
3746
3747 switch (Opcode) {
3748 // 64-bit shift instructions can use only one scalar value input
3749 case AMDGPU::V_LSHLREV_B64_e64:
3750 case AMDGPU::V_LSHLREV_B64_gfx10:
3751 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3752 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3753 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3754 case AMDGPU::V_LSHRREV_B64_e64:
3755 case AMDGPU::V_LSHRREV_B64_gfx10:
3756 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3757 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3758 case AMDGPU::V_ASHRREV_I64_e64:
3759 case AMDGPU::V_ASHRREV_I64_gfx10:
3760 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3761 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3762 case AMDGPU::V_LSHL_B64_e64:
3763 case AMDGPU::V_LSHR_B64_e64:
3764 case AMDGPU::V_ASHR_I64_e64:
3765 return 1;
3766 default:
3767 return 2;
3768 }
3769}
3770
3771constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3773
3774// Get regular operand indices in the same order as specified
3775// in the instruction (but append mandatory literals to the end).
3777 bool AddMandatoryLiterals = false) {
3778
3779 int16_t ImmIdx =
3780 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3781
3782 if (isVOPD(Opcode)) {
3783 int16_t ImmXIdx =
3784 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immX) : -1;
3785
3786 return {getNamedOperandIdx(Opcode, OpName::src0X),
3787 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3788 getNamedOperandIdx(Opcode, OpName::vsrc2X),
3789 getNamedOperandIdx(Opcode, OpName::src0Y),
3790 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3791 getNamedOperandIdx(Opcode, OpName::vsrc2Y),
3792 ImmXIdx,
3793 ImmIdx};
3794 }
3795
3796 return {getNamedOperandIdx(Opcode, OpName::src0),
3797 getNamedOperandIdx(Opcode, OpName::src1),
3798 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3799}
3800
3801bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3802 const MCOperand &MO = Inst.getOperand(OpIdx);
3803 if (MO.isImm())
3804 return !isInlineConstant(Inst, OpIdx);
3805 if (MO.isReg()) {
3806 auto Reg = MO.getReg();
3807 if (!Reg)
3808 return false;
3809 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3810 auto PReg = mc2PseudoReg(Reg);
3811 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3812 }
3813 return true;
3814}
3815
3816// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3817// Writelane is special in that it can use SGPR and M0 (which would normally
3818// count as using the constant bus twice - but in this case it is allowed since
3819// the lane selector doesn't count as a use of the constant bus). However, it is
3820// still required to abide by the 1 SGPR rule.
3821static bool checkWriteLane(const MCInst &Inst) {
3822 const unsigned Opcode = Inst.getOpcode();
3823 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3824 return false;
3825 const MCOperand &LaneSelOp = Inst.getOperand(2);
3826 if (!LaneSelOp.isReg())
3827 return false;
3828 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3829 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3830}
3831
3832bool AMDGPUAsmParser::validateConstantBusLimitations(
3833 const MCInst &Inst, const OperandVector &Operands) {
3834 const unsigned Opcode = Inst.getOpcode();
3835 const MCInstrDesc &Desc = MII.get(Opcode);
3836 MCRegister LastSGPR;
3837 unsigned ConstantBusUseCount = 0;
3838 unsigned NumLiterals = 0;
3839 unsigned LiteralSize;
3840
3841 if (!(Desc.TSFlags &
3844 !isVOPD(Opcode))
3845 return true;
3846
3847 if (checkWriteLane(Inst))
3848 return true;
3849
3850 // Check special imm operands (used by madmk, etc)
3851 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3852 ++NumLiterals;
3853 LiteralSize = 4;
3854 }
3855
3856 SmallDenseSet<unsigned> SGPRsUsed;
3857 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3858 if (SGPRUsed != AMDGPU::NoRegister) {
3859 SGPRsUsed.insert(SGPRUsed);
3860 ++ConstantBusUseCount;
3861 }
3862
3863 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3864
3865 unsigned ConstantBusLimit = getConstantBusLimit(Opcode);
3866
3867 for (int OpIdx : OpIndices) {
3868 if (OpIdx == -1)
3869 continue;
3870
3871 const MCOperand &MO = Inst.getOperand(OpIdx);
3872 if (usesConstantBus(Inst, OpIdx)) {
3873 if (MO.isReg()) {
3874 LastSGPR = mc2PseudoReg(MO.getReg());
3875 // Pairs of registers with a partial intersections like these
3876 // s0, s[0:1]
3877 // flat_scratch_lo, flat_scratch
3878 // flat_scratch_lo, flat_scratch_hi
3879 // are theoretically valid but they are disabled anyway.
3880 // Note that this code mimics SIInstrInfo::verifyInstruction
3881 if (SGPRsUsed.insert(LastSGPR).second) {
3882 ++ConstantBusUseCount;
3883 }
3884 } else { // Expression or a literal
3885
3886 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3887 continue; // special operand like VINTERP attr_chan
3888
3889 // An instruction may use only one literal.
3890 // This has been validated on the previous step.
3891 // See validateVOPLiteral.
3892 // This literal may be used as more than one operand.
3893 // If all these operands are of the same size,
3894 // this literal counts as one scalar value.
3895 // Otherwise it counts as 2 scalar values.
3896 // See "GFX10 Shader Programming", section 3.6.2.3.
3897
3899 if (Size < 4)
3900 Size = 4;
3901
3902 if (NumLiterals == 0) {
3903 NumLiterals = 1;
3904 LiteralSize = Size;
3905 } else if (LiteralSize != Size) {
3906 NumLiterals = 2;
3907 }
3908 }
3909 }
3910
3911 if (ConstantBusUseCount + NumLiterals > ConstantBusLimit) {
3912 Error(getOperandLoc(Operands, OpIdx),
3913 "invalid operand (violates constant bus restrictions)");
3914 return false;
3915 }
3916 }
3917 return true;
3918}
3919
3920std::optional<unsigned>
3921AMDGPUAsmParser::checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3) {
3922
3923 const unsigned Opcode = Inst.getOpcode();
3924 if (!isVOPD(Opcode))
3925 return {};
3926
3927 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3928
3929 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3930 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3931 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3932 ? Opr.getReg()
3933 : MCRegister();
3934 };
3935
3936 // On GFX12+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
3937 // source-cache.
3938 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
3939 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
3940 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250;
3941 bool AllowSameVGPR = isGFX1250();
3942
3943 if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
3944 for (auto OpName : {OpName::src0X, OpName::src0Y}) {
3945 int I = getNamedOperandIdx(Opcode, OpName);
3946 const MCOperand &Op = Inst.getOperand(I);
3947 if (!Op.isImm())
3948 continue;
3949 int64_t Imm = Op.getImm();
3950 if (!AMDGPU::isInlinableLiteral32(Imm, hasInv2PiInlineImm()) &&
3951 !AMDGPU::isInlinableLiteral64(Imm, hasInv2PiInlineImm()))
3952 return (unsigned)I;
3953 }
3954
3955 for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
3956 OpName::vsrc2Y, OpName::imm}) {
3957 int I = getNamedOperandIdx(Opcode, OpName);
3958 if (I == -1)
3959 continue;
3960 const MCOperand &Op = Inst.getOperand(I);
3961 if (Op.isImm())
3962 return (unsigned)I;
3963 }
3964 }
3965
3966 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3967 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
3968 getVRegIdx, *TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
3969
3970 return InvalidCompOprIdx;
3971}
3972
3973bool AMDGPUAsmParser::validateVOPD(const MCInst &Inst,
3974 const OperandVector &Operands) {
3975
3976 unsigned Opcode = Inst.getOpcode();
3977 bool AsVOPD3 = MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3;
3978
3979 if (AsVOPD3) {
3980 for (const std::unique_ptr<MCParsedAsmOperand> &Operand : Operands) {
3981 AMDGPUOperand &Op = (AMDGPUOperand &)*Operand;
3982 if ((Op.isRegKind() || Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
3983 (Op.getModifiers().getFPModifiersOperand() & SISrcMods::ABS))
3984 Error(Op.getStartLoc(), "ABS not allowed in VOPD3 instructions");
3985 }
3986 }
3987
3988 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
3989 if (!InvalidCompOprIdx.has_value())
3990 return true;
3991
3992 auto CompOprIdx = *InvalidCompOprIdx;
3993 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3994 auto ParsedIdx =
3995 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3996 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3997 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3998
3999 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
4000 if (CompOprIdx == VOPD::Component::DST) {
4001 if (AsVOPD3)
4002 Error(Loc, "dst registers must be distinct");
4003 else
4004 Error(Loc, "one dst register must be even and the other odd");
4005 } else {
4006 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
4007 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
4008 " operands must use different VGPR banks");
4009 }
4010
4011 return false;
4012}
4013
4014// \returns true if \p Inst does not satisfy VOPD constraints, but can be
4015// potentially used as VOPD3 with the same operands.
4016bool AMDGPUAsmParser::tryVOPD3(const MCInst &Inst) {
4017 // First check if it fits VOPD
4018 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, false);
4019 if (!InvalidCompOprIdx.has_value())
4020 return false;
4021
4022 // Then if it fits VOPD3
4023 InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, true);
4024 if (InvalidCompOprIdx.has_value()) {
4025 // If failed operand is dst it is better to show error about VOPD3
4026 // instruction as it has more capabilities and error message will be
4027 // more informative. If the dst is not legal for VOPD3, then it is not
4028 // legal for VOPD either.
4029 if (*InvalidCompOprIdx == VOPD::Component::DST)
4030 return true;
4031
4032 // Otherwise prefer VOPD as we may find ourselves in an awkward situation
4033 // with a conflict in tied implicit src2 of fmac and no asm operand to
4034 // to point to.
4035 return false;
4036 }
4037 return true;
4038}
4039
4040// \returns true is a VOPD3 instruction can be also represented as a shorter
4041// VOPD encoding.
4042bool AMDGPUAsmParser::tryVOPD(const MCInst &Inst) {
4043 const unsigned Opcode = Inst.getOpcode();
4044 const auto &II = getVOPDInstInfo(Opcode, &MII);
4045 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(getSTI());
4046 if (!getCanBeVOPD(II[VOPD::X].getOpcode(), EncodingFamily, false).X ||
4047 !getCanBeVOPD(II[VOPD::Y].getOpcode(), EncodingFamily, false).Y)
4048 return false;
4049
4050 // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
4051 // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
4052 // be parsed as VOPD which does not accept src2.
4053 if (II[VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 ||
4054 II[VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32)
4055 return false;
4056
4057 // If any modifiers are set this cannot be VOPD.
4058 for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4059 OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4060 OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4061 int I = getNamedOperandIdx(Opcode, OpName);
4062 if (I == -1)
4063 continue;
4064 if (Inst.getOperand(I).getImm())
4065 return false;
4066 }
4067
4068 return !tryVOPD3(Inst);
4069}
4070
4071// VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
4072// form but switch to VOPD3 otherwise.
4073bool AMDGPUAsmParser::tryAnotherVOPDEncoding(const MCInst &Inst) {
4074 const unsigned Opcode = Inst.getOpcode();
4075 if (!isGFX1250() || !isVOPD(Opcode))
4076 return false;
4077
4078 if (MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3)
4079 return tryVOPD(Inst);
4080 return tryVOPD3(Inst);
4081}
4082
4083bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
4084
4085 const unsigned Opc = Inst.getOpcode();
4086 const MCInstrDesc &Desc = MII.get(Opc);
4087
4088 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
4089 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
4090 assert(ClampIdx != -1);
4091 return Inst.getOperand(ClampIdx).getImm() == 0;
4092 }
4093
4094 return true;
4095}
4096
4099
4100bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc) {
4101
4102 const unsigned Opc = Inst.getOpcode();
4103 const MCInstrDesc &Desc = MII.get(Opc);
4104
4105 if ((Desc.TSFlags & MIMGFlags) == 0)
4106 return true;
4107
4108 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
4109 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4110 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
4111
4112 if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample
4113 return true;
4114
4115 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
4116 return true;
4117
4118 unsigned VDataSize = getRegOperandSize(Desc, VDataIdx);
4119 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
4120 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4121 if (DMask == 0)
4122 DMask = 1;
4123
4124 bool IsPackedD16 = false;
4125 unsigned DataSize =
4126 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
4127 if (hasPackedD16()) {
4128 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4129 IsPackedD16 = D16Idx >= 0;
4130 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
4131 DataSize = (DataSize + 1) / 2;
4132 }
4133
4134 if ((VDataSize / 4) == DataSize + TFESize)
4135 return true;
4136
4137 StringRef Modifiers;
4138 if (isGFX90A())
4139 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
4140 else
4141 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
4142
4143 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
4144 return false;
4145}
4146
4147bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc) {
4148 const unsigned Opc = Inst.getOpcode();
4149 const MCInstrDesc &Desc = MII.get(Opc);
4150
4151 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
4152 return true;
4153
4154 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4155
4156 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4158 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
4159 AMDGPU::OpName RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG)
4160 ? AMDGPU::OpName::srsrc
4161 : AMDGPU::OpName::rsrc;
4162 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
4163 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4164 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
4165
4166 assert(VAddr0Idx != -1);
4167 assert(SrsrcIdx != -1);
4168 assert(SrsrcIdx > VAddr0Idx);
4169
4170 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
4171 if (BaseOpcode->BVH) {
4172 if (IsA16 == BaseOpcode->A16)
4173 return true;
4174 Error(IDLoc, "image address size does not match a16");
4175 return false;
4176 }
4177
4178 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4179 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4180 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
4181 unsigned ActualAddrSize =
4182 IsNSA ? SrsrcIdx - VAddr0Idx : getRegOperandSize(Desc, VAddr0Idx) / 4;
4183
4184 unsigned ExpectedAddrSize =
4185 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
4186
4187 if (IsNSA) {
4188 if (hasPartialNSAEncoding() &&
4189 ExpectedAddrSize >
4191 int VAddrLastIdx = SrsrcIdx - 1;
4192 unsigned VAddrLastSize = getRegOperandSize(Desc, VAddrLastIdx) / 4;
4193
4194 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
4195 }
4196 } else {
4197 if (ExpectedAddrSize > 12)
4198 ExpectedAddrSize = 16;
4199
4200 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
4201 // This provides backward compatibility for assembly created
4202 // before 160b/192b/224b types were directly supported.
4203 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4204 return true;
4205 }
4206
4207 if (ActualAddrSize == ExpectedAddrSize)
4208 return true;
4209
4210 Error(IDLoc, "image address size does not match dim and a16");
4211 return false;
4212}
4213
4214bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
4215
4216 const unsigned Opc = Inst.getOpcode();
4217 const MCInstrDesc &Desc = MII.get(Opc);
4218
4219 if ((Desc.TSFlags & MIMGFlags) == 0)
4220 return true;
4221 if (!Desc.mayLoad() || !Desc.mayStore())
4222 return true; // Not atomic
4223
4224 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4225 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4226
4227 // This is an incomplete check because image_atomic_cmpswap
4228 // may only use 0x3 and 0xf while other atomic operations
4229 // may use 0x1 and 0x3. However these limitations are
4230 // verified when we check that dmask matches dst size.
4231 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4232}
4233
4234bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4235
4236 const unsigned Opc = Inst.getOpcode();
4237 const MCInstrDesc &Desc = MII.get(Opc);
4238
4239 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4240 return true;
4241
4242 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4243 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4244
4245 // GATHER4 instructions use dmask in a different fashion compared to
4246 // other MIMG instructions. The only useful DMASK values are
4247 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4248 // (red,red,red,red) etc.) The ISA document doesn't mention
4249 // this.
4250 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4251}
4252
4253bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst,
4254 const OperandVector &Operands) {
4255 if (!isGFX10Plus())
4256 return true;
4257
4258 const unsigned Opc = Inst.getOpcode();
4259 const MCInstrDesc &Desc = MII.get(Opc);
4260
4261 if ((Desc.TSFlags & MIMGFlags) == 0)
4262 return true;
4263
4264 // image_bvh_intersect_ray instructions do not have dim
4266 return true;
4267
4268 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4269 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4270 if (Op.isDim())
4271 return true;
4272 }
4273 return false;
4274}
4275
4276bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4277 const unsigned Opc = Inst.getOpcode();
4278 const MCInstrDesc &Desc = MII.get(Opc);
4279
4280 if ((Desc.TSFlags & MIMGFlags) == 0)
4281 return true;
4282
4283 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4284 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4286
4287 if (!BaseOpcode->MSAA)
4288 return true;
4289
4290 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4291 assert(DimIdx != -1);
4292
4293 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4294 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4295
4296 return DimInfo->MSAA;
4297}
4298
4299static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4300{
4301 switch (Opcode) {
4302 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4303 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4304 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4305 return true;
4306 default:
4307 return false;
4308 }
4309}
4310
4311// movrels* opcodes should only allow VGPRS as src0.
4312// This is specified in .td description for vop1/vop3,
4313// but sdwa is handled differently. See isSDWAOperand.
4314bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4315 const OperandVector &Operands) {
4316
4317 const unsigned Opc = Inst.getOpcode();
4318 const MCInstrDesc &Desc = MII.get(Opc);
4319
4320 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4321 return true;
4322
4323 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4324 assert(Src0Idx != -1);
4325
4326 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4327 if (Src0.isReg()) {
4328 auto Reg = mc2PseudoReg(Src0.getReg());
4329 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4330 if (!isSGPR(Reg, TRI))
4331 return true;
4332 }
4333
4334 Error(getOperandLoc(Operands, Src0Idx), "source operand must be a VGPR");
4335 return false;
4336}
4337
4338bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4339 const OperandVector &Operands) {
4340
4341 const unsigned Opc = Inst.getOpcode();
4342
4343 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4344 return true;
4345
4346 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4347 assert(Src0Idx != -1);
4348
4349 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4350 if (!Src0.isReg())
4351 return true;
4352
4353 auto Reg = mc2PseudoReg(Src0.getReg());
4354 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4355 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4356 Error(getOperandLoc(Operands, Src0Idx),
4357 "source operand must be either a VGPR or an inline constant");
4358 return false;
4359 }
4360
4361 return true;
4362}
4363
4364bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4365 const OperandVector &Operands) {
4366 unsigned Opcode = Inst.getOpcode();
4367 const MCInstrDesc &Desc = MII.get(Opcode);
4368
4369 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4370 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4371 return true;
4372
4373 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4374 if (Src2Idx == -1)
4375 return true;
4376
4377 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4378 Error(getOperandLoc(Operands, Src2Idx),
4379 "inline constants are not allowed for this operand");
4380 return false;
4381 }
4382
4383 return true;
4384}
4385
4386bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4387 const OperandVector &Operands) {
4388 const unsigned Opc = Inst.getOpcode();
4389 const MCInstrDesc &Desc = MII.get(Opc);
4390
4391 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4392 return true;
4393
4394 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4395 if (BlgpIdx != -1) {
4396 if (const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(Opc)) {
4397 int CbszIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
4398
4399 unsigned CBSZ = Inst.getOperand(CbszIdx).getImm();
4400 unsigned BLGP = Inst.getOperand(BlgpIdx).getImm();
4401
4402 // Validate the correct register size was used for the floating point
4403 // format operands
4404
4405 bool Success = true;
4406 if (Info->NumRegsSrcA != mfmaScaleF8F6F4FormatToNumRegs(CBSZ)) {
4407 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4408 Error(getOperandLoc(Operands, Src0Idx),
4409 "wrong register tuple size for cbsz value " + Twine(CBSZ));
4410 Success = false;
4411 }
4412
4413 if (Info->NumRegsSrcB != mfmaScaleF8F6F4FormatToNumRegs(BLGP)) {
4414 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4415 Error(getOperandLoc(Operands, Src1Idx),
4416 "wrong register tuple size for blgp value " + Twine(BLGP));
4417 Success = false;
4418 }
4419
4420 return Success;
4421 }
4422 }
4423
4424 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4425 if (Src2Idx == -1)
4426 return true;
4427
4428 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4429 if (!Src2.isReg())
4430 return true;
4431
4432 MCRegister Src2Reg = Src2.getReg();
4433 MCRegister DstReg = Inst.getOperand(0).getReg();
4434 if (Src2Reg == DstReg)
4435 return true;
4436
4437 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4438 if (TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[0], HwMode))
4439 .getSizeInBits() <= 128)
4440 return true;
4441
4442 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4443 Error(getOperandLoc(Operands, Src2Idx),
4444 "source 2 operand must not partially overlap with dst");
4445 return false;
4446 }
4447
4448 return true;
4449}
4450
4451bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4452 switch (Inst.getOpcode()) {
4453 default:
4454 return true;
4455 case V_DIV_SCALE_F32_gfx6_gfx7:
4456 case V_DIV_SCALE_F32_vi:
4457 case V_DIV_SCALE_F32_gfx10:
4458 case V_DIV_SCALE_F64_gfx6_gfx7:
4459 case V_DIV_SCALE_F64_vi:
4460 case V_DIV_SCALE_F64_gfx10:
4461 break;
4462 }
4463
4464 // TODO: Check that src0 = src1 or src2.
4465
4466 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4467 AMDGPU::OpName::src2_modifiers,
4468 AMDGPU::OpName::src2_modifiers}) {
4469 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
4470 .getImm() &
4472 return false;
4473 }
4474 }
4475
4476 return true;
4477}
4478
4479bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4480
4481 const unsigned Opc = Inst.getOpcode();
4482 const MCInstrDesc &Desc = MII.get(Opc);
4483
4484 if ((Desc.TSFlags & MIMGFlags) == 0)
4485 return true;
4486
4487 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4488 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4489 if (isCI() || isSI())
4490 return false;
4491 }
4492
4493 return true;
4494}
4495
4496bool AMDGPUAsmParser::validateTensorR128(const MCInst &Inst) {
4497 const unsigned Opc = Inst.getOpcode();
4498 const MCInstrDesc &Desc = MII.get(Opc);
4499
4500 if ((Desc.TSFlags & SIInstrFlags::TENSOR_CNT) == 0)
4501 return true;
4502
4503 int R128Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
4504
4505 return R128Idx < 0 || !Inst.getOperand(R128Idx).getImm();
4506}
4507
4508static bool IsRevOpcode(const unsigned Opcode)
4509{
4510 switch (Opcode) {
4511 case AMDGPU::V_SUBREV_F32_e32:
4512 case AMDGPU::V_SUBREV_F32_e64:
4513 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4514 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4515 case AMDGPU::V_SUBREV_F32_e32_vi:
4516 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4517 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4518 case AMDGPU::V_SUBREV_F32_e64_vi:
4519
4520 case AMDGPU::V_SUBREV_CO_U32_e32:
4521 case AMDGPU::V_SUBREV_CO_U32_e64:
4522 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4523 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4524
4525 case AMDGPU::V_SUBBREV_U32_e32:
4526 case AMDGPU::V_SUBBREV_U32_e64:
4527 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4528 case AMDGPU::V_SUBBREV_U32_e32_vi:
4529 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4530 case AMDGPU::V_SUBBREV_U32_e64_vi:
4531
4532 case AMDGPU::V_SUBREV_U32_e32:
4533 case AMDGPU::V_SUBREV_U32_e64:
4534 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4535 case AMDGPU::V_SUBREV_U32_e32_vi:
4536 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4537 case AMDGPU::V_SUBREV_U32_e64_vi:
4538
4539 case AMDGPU::V_SUBREV_F16_e32:
4540 case AMDGPU::V_SUBREV_F16_e64:
4541 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4542 case AMDGPU::V_SUBREV_F16_e32_vi:
4543 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4544 case AMDGPU::V_SUBREV_F16_e64_vi:
4545
4546 case AMDGPU::V_SUBREV_U16_e32:
4547 case AMDGPU::V_SUBREV_U16_e64:
4548 case AMDGPU::V_SUBREV_U16_e32_vi:
4549 case AMDGPU::V_SUBREV_U16_e64_vi:
4550
4551 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4552 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4553 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4554
4555 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4556 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4557
4558 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4559 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4560
4561 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4562 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4563
4564 case AMDGPU::V_LSHRREV_B32_e32:
4565 case AMDGPU::V_LSHRREV_B32_e64:
4566 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4567 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4568 case AMDGPU::V_LSHRREV_B32_e32_vi:
4569 case AMDGPU::V_LSHRREV_B32_e64_vi:
4570 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4571 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4572
4573 case AMDGPU::V_ASHRREV_I32_e32:
4574 case AMDGPU::V_ASHRREV_I32_e64:
4575 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4576 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4577 case AMDGPU::V_ASHRREV_I32_e32_vi:
4578 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4579 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4580 case AMDGPU::V_ASHRREV_I32_e64_vi:
4581
4582 case AMDGPU::V_LSHLREV_B32_e32:
4583 case AMDGPU::V_LSHLREV_B32_e64:
4584 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4585 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4586 case AMDGPU::V_LSHLREV_B32_e32_vi:
4587 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4588 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4589 case AMDGPU::V_LSHLREV_B32_e64_vi:
4590
4591 case AMDGPU::V_LSHLREV_B16_e32:
4592 case AMDGPU::V_LSHLREV_B16_e64:
4593 case AMDGPU::V_LSHLREV_B16_e32_vi:
4594 case AMDGPU::V_LSHLREV_B16_e64_vi:
4595 case AMDGPU::V_LSHLREV_B16_gfx10:
4596
4597 case AMDGPU::V_LSHRREV_B16_e32:
4598 case AMDGPU::V_LSHRREV_B16_e64:
4599 case AMDGPU::V_LSHRREV_B16_e32_vi:
4600 case AMDGPU::V_LSHRREV_B16_e64_vi:
4601 case AMDGPU::V_LSHRREV_B16_gfx10:
4602
4603 case AMDGPU::V_ASHRREV_I16_e32:
4604 case AMDGPU::V_ASHRREV_I16_e64:
4605 case AMDGPU::V_ASHRREV_I16_e32_vi:
4606 case AMDGPU::V_ASHRREV_I16_e64_vi:
4607 case AMDGPU::V_ASHRREV_I16_gfx10:
4608
4609 case AMDGPU::V_LSHLREV_B64_e64:
4610 case AMDGPU::V_LSHLREV_B64_gfx10:
4611 case AMDGPU::V_LSHLREV_B64_vi:
4612
4613 case AMDGPU::V_LSHRREV_B64_e64:
4614 case AMDGPU::V_LSHRREV_B64_gfx10:
4615 case AMDGPU::V_LSHRREV_B64_vi:
4616
4617 case AMDGPU::V_ASHRREV_I64_e64:
4618 case AMDGPU::V_ASHRREV_I64_gfx10:
4619 case AMDGPU::V_ASHRREV_I64_vi:
4620
4621 case AMDGPU::V_PK_LSHLREV_B16:
4622 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4623 case AMDGPU::V_PK_LSHLREV_B16_vi:
4624
4625 case AMDGPU::V_PK_LSHRREV_B16:
4626 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4627 case AMDGPU::V_PK_LSHRREV_B16_vi:
4628 case AMDGPU::V_PK_ASHRREV_I16:
4629 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4630 case AMDGPU::V_PK_ASHRREV_I16_vi:
4631 return true;
4632 default:
4633 return false;
4634 }
4635}
4636
4637bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst,
4638 const OperandVector &Operands) {
4639 using namespace SIInstrFlags;
4640 const unsigned Opcode = Inst.getOpcode();
4641 const MCInstrDesc &Desc = MII.get(Opcode);
4642
4643 // lds_direct register is defined so that it can be used
4644 // with 9-bit operands only. Ignore encodings which do not accept these.
4645 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4646 if ((Desc.TSFlags & Enc) == 0)
4647 return true;
4648
4649 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4650 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4651 if (SrcIdx == -1)
4652 break;
4653 const auto &Src = Inst.getOperand(SrcIdx);
4654 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4655
4656 if (isGFX90A() || isGFX11Plus()) {
4657 Error(getOperandLoc(Operands, SrcIdx),
4658 "lds_direct is not supported on this GPU");
4659 return false;
4660 }
4661
4662 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) {
4663 Error(getOperandLoc(Operands, SrcIdx),
4664 "lds_direct cannot be used with this instruction");
4665 return false;
4666 }
4667
4668 if (SrcName != OpName::src0) {
4669 Error(getOperandLoc(Operands, SrcIdx),
4670 "lds_direct may be used as src0 only");
4671 return false;
4672 }
4673 }
4674 }
4675
4676 return true;
4677}
4678
4679SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4680 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4681 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4682 if (Op.isFlatOffset())
4683 return Op.getStartLoc();
4684 }
4685 return getLoc();
4686}
4687
4688bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4689 const OperandVector &Operands) {
4690 auto Opcode = Inst.getOpcode();
4691 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4692 if (OpNum == -1)
4693 return true;
4694
4695 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4696 if ((TSFlags & SIInstrFlags::FLAT))
4697 return validateFlatOffset(Inst, Operands);
4698
4699 if ((TSFlags & SIInstrFlags::SMRD))
4700 return validateSMEMOffset(Inst, Operands);
4701
4702 const auto &Op = Inst.getOperand(OpNum);
4703 // GFX12+ buffer ops: InstOffset is signed 24, but must not be a negative.
4704 if (isGFX12Plus() &&
4705 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4706 const unsigned OffsetSize = 24;
4707 if (!isUIntN(OffsetSize - 1, Op.getImm())) {
4708 Error(getFlatOffsetLoc(Operands),
4709 Twine("expected a ") + Twine(OffsetSize - 1) +
4710 "-bit unsigned offset for buffer ops");
4711 return false;
4712 }
4713 } else {
4714 const unsigned OffsetSize = 16;
4715 if (!isUIntN(OffsetSize, Op.getImm())) {
4716 Error(getFlatOffsetLoc(Operands),
4717 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4718 return false;
4719 }
4720 }
4721 return true;
4722}
4723
4724bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4725 const OperandVector &Operands) {
4726 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4727 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4728 return true;
4729
4730 auto Opcode = Inst.getOpcode();
4731 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4732 assert(OpNum != -1);
4733
4734 const auto &Op = Inst.getOperand(OpNum);
4735 if (!hasFlatOffsets() && Op.getImm() != 0) {
4736 Error(getFlatOffsetLoc(Operands),
4737 "flat offset modifier is not supported on this GPU");
4738 return false;
4739 }
4740
4741 // For pre-GFX12 FLAT instructions the offset must be positive;
4742 // MSB is ignored and forced to zero.
4743 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4744 bool AllowNegative =
4746 isGFX12Plus();
4747 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4748 Error(getFlatOffsetLoc(Operands),
4749 Twine("expected a ") +
4750 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4751 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4752 return false;
4753 }
4754
4755 return true;
4756}
4757
4758SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4759 // Start with second operand because SMEM Offset cannot be dst or src0.
4760 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4761 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4762 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4763 return Op.getStartLoc();
4764 }
4765 return getLoc();
4766}
4767
4768bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4769 const OperandVector &Operands) {
4770 if (isCI() || isSI())
4771 return true;
4772
4773 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4774 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4775 return true;
4776
4777 auto Opcode = Inst.getOpcode();
4778 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4779 if (OpNum == -1)
4780 return true;
4781
4782 const auto &Op = Inst.getOperand(OpNum);
4783 if (!Op.isImm())
4784 return true;
4785
4786 uint64_t Offset = Op.getImm();
4787 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4790 return true;
4791
4792 Error(getSMEMOffsetLoc(Operands),
4793 isGFX12Plus() && IsBuffer
4794 ? "expected a 23-bit unsigned offset for buffer ops"
4795 : isGFX12Plus() ? "expected a 24-bit signed offset"
4796 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4797 : "expected a 21-bit signed offset");
4798
4799 return false;
4800}
4801
4802bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst,
4803 const OperandVector &Operands) {
4804 unsigned Opcode = Inst.getOpcode();
4805 const MCInstrDesc &Desc = MII.get(Opcode);
4806 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4807 return true;
4808
4809 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4810 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4811
4812 const int OpIndices[] = { Src0Idx, Src1Idx };
4813
4814 unsigned NumExprs = 0;
4815 unsigned NumLiterals = 0;
4816 int64_t LiteralValue;
4817
4818 for (int OpIdx : OpIndices) {
4819 if (OpIdx == -1) break;
4820
4821 const MCOperand &MO = Inst.getOperand(OpIdx);
4822 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4824 std::optional<int64_t> Imm;
4825 if (MO.isImm()) {
4826 Imm = MO.getImm();
4827 } else if (MO.isExpr()) {
4828 if (isLitExpr(MO.getExpr()))
4829 Imm = getLitValue(MO.getExpr());
4830 } else {
4831 continue;
4832 }
4833
4834 if (!Imm.has_value()) {
4835 ++NumExprs;
4836 } else if (!isInlineConstant(Inst, OpIdx)) {
4837 auto OpType = static_cast<AMDGPU::OperandType>(
4838 Desc.operands()[OpIdx].OperandType);
4839 int64_t Value = encode32BitLiteral(*Imm, OpType);
4840 if (NumLiterals == 0 || LiteralValue != Value) {
4842 ++NumLiterals;
4843 }
4844 }
4845 }
4846 }
4847
4848 if (NumLiterals + NumExprs <= 1)
4849 return true;
4850
4851 Error(getOperandLoc(Operands, Src1Idx),
4852 "only one unique literal operand is allowed");
4853 return false;
4854}
4855
4856bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4857 const unsigned Opc = Inst.getOpcode();
4858 if (isPermlane16(Opc)) {
4859 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4860 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4861
4862 if (OpSel & ~3)
4863 return false;
4864 }
4865
4866 uint64_t TSFlags = MII.get(Opc).TSFlags;
4867
4868 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4869 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4870 if (OpSelIdx != -1) {
4871 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4872 return false;
4873 }
4874 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4875 if (OpSelHiIdx != -1) {
4876 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4877 return false;
4878 }
4879 }
4880
4881 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4882 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4883 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4884 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4885 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4886 if (OpSel & 3)
4887 return false;
4888 }
4889
4890 // Packed math FP32 instructions typically accept SGPRs or VGPRs as source
4891 // operands. On gfx12+, if a source operand uses SGPRs, the HW can only read
4892 // the first SGPR and use it for both the low and high operations.
4893 if (isPackedFP32Inst(Opc) && isGFX12Plus()) {
4894 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4895 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4896 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4897 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4898
4899 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4900 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4901 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4902 unsigned OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
4903
4904 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4905
4906 auto VerifyOneSGPR = [OpSel, OpSelHi](unsigned Index) -> bool {
4907 unsigned Mask = 1U << Index;
4908 return ((OpSel & Mask) == 0) && ((OpSelHi & Mask) == 0);
4909 };
4910
4911 if (Src0.isReg() && isSGPR(Src0.getReg(), TRI) &&
4912 !VerifyOneSGPR(/*Index=*/0))
4913 return false;
4914 if (Src1.isReg() && isSGPR(Src1.getReg(), TRI) &&
4915 !VerifyOneSGPR(/*Index=*/1))
4916 return false;
4917
4918 int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4919 if (Src2Idx != -1) {
4920 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4921 if (Src2.isReg() && isSGPR(Src2.getReg(), TRI) &&
4922 !VerifyOneSGPR(/*Index=*/2))
4923 return false;
4924 }
4925 }
4926
4927 return true;
4928}
4929
4930bool AMDGPUAsmParser::validateTrue16OpSel(const MCInst &Inst) {
4931 if (!hasTrue16Insts())
4932 return true;
4933 const MCRegisterInfo *MRI = getMRI();
4934 const unsigned Opc = Inst.getOpcode();
4935 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4936 if (OpSelIdx == -1)
4937 return true;
4938 unsigned OpSelOpValue = Inst.getOperand(OpSelIdx).getImm();
4939 // If the value is 0 we could have a default OpSel Operand, so conservatively
4940 // allow it.
4941 if (OpSelOpValue == 0)
4942 return true;
4943 unsigned OpCount = 0;
4944 for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
4945 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
4946 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), OpName);
4947 if (OpIdx == -1)
4948 continue;
4949 const MCOperand &Op = Inst.getOperand(OpIdx);
4950 if (Op.isReg() &&
4951 MRI->getRegClass(AMDGPU::VGPR_16RegClassID).contains(Op.getReg())) {
4952 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Op.getReg(), *MRI);
4953 bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);
4954 if (OpSelOpIsHi != VGPRSuffixIsHi)
4955 return false;
4956 }
4957 ++OpCount;
4958 }
4959
4960 return true;
4961}
4962
4963bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, AMDGPU::OpName OpName) {
4964 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
4965
4966 const unsigned Opc = Inst.getOpcode();
4967 uint64_t TSFlags = MII.get(Opc).TSFlags;
4968
4969 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
4970 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
4971 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
4972 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
4973 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
4974 !(TSFlags & SIInstrFlags::IsSWMMAC))
4975 return true;
4976
4977 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
4978 if (NegIdx == -1)
4979 return true;
4980
4981 unsigned Neg = Inst.getOperand(NegIdx).getImm();
4982
4983 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
4984 // on some src operands but not allowed on other.
4985 // It is convenient that such instructions don't have src_modifiers operand
4986 // for src operands that don't allow neg because they also don't allow opsel.
4987
4988 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4989 AMDGPU::OpName::src1_modifiers,
4990 AMDGPU::OpName::src2_modifiers};
4991
4992 for (unsigned i = 0; i < 3; ++i) {
4993 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
4994 if (Neg & (1 << i))
4995 return false;
4996 }
4997 }
4998
4999 return true;
5000}
5001
5002bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
5003 const OperandVector &Operands) {
5004 const unsigned Opc = Inst.getOpcode();
5005 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
5006 if (DppCtrlIdx >= 0) {
5007 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
5008
5009 if (!AMDGPU::isLegalDPALU_DPPControl(getSTI(), DppCtrl) &&
5010 AMDGPU::isDPALU_DPP(MII.get(Opc), MII, getSTI())) {
5011 // DP ALU DPP is supported for row_newbcast only on GFX9* and row_share
5012 // only on GFX12.
5013 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
5014 Error(S, isGFX12() ? "DP ALU dpp only supports row_share"
5015 : "DP ALU dpp only supports row_newbcast");
5016 return false;
5017 }
5018 }
5019
5020 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
5021 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
5022
5023 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
5024 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
5025 if (Src1Idx >= 0) {
5026 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
5027 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5028 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
5029 Error(getOperandLoc(Operands, Src1Idx),
5030 "invalid operand for instruction");
5031 return false;
5032 }
5033 if (Src1.isImm()) {
5034 Error(getInstLoc(Operands),
5035 "src1 immediate operand invalid for instruction");
5036 return false;
5037 }
5038 }
5039 }
5040
5041 return true;
5042}
5043
5044// Check if VCC register matches wavefront size
5045bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const {
5046 return (Reg == AMDGPU::VCC && isWave64()) ||
5047 (Reg == AMDGPU::VCC_LO && isWave32());
5048}
5049
5050// One unique literal can be used. VOP3 literal is only allowed in GFX10+
5051bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
5052 const OperandVector &Operands) {
5053 unsigned Opcode = Inst.getOpcode();
5054 const MCInstrDesc &Desc = MII.get(Opcode);
5055 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
5056 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
5057 !HasMandatoryLiteral && !isVOPD(Opcode))
5058 return true;
5059
5060 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
5061
5062 std::optional<unsigned> LiteralOpIdx;
5063 std::optional<uint64_t> LiteralValue;
5064
5065 for (int OpIdx : OpIndices) {
5066 if (OpIdx == -1)
5067 continue;
5068
5069 const MCOperand &MO = Inst.getOperand(OpIdx);
5070 if (!MO.isImm() && !MO.isExpr())
5071 continue;
5072 if (!isSISrcOperand(Desc, OpIdx))
5073 continue;
5074
5075 std::optional<int64_t> Imm;
5076 if (MO.isImm())
5077 Imm = MO.getImm();
5078 else if (MO.isExpr() && isLitExpr(MO.getExpr()))
5079 Imm = getLitValue(MO.getExpr());
5080
5081 bool IsAnotherLiteral = false;
5082 if (!Imm.has_value()) {
5083 // Literal value not known, so we conservately assume it's different.
5084 IsAnotherLiteral = true;
5085 } else if (!isInlineConstant(Inst, OpIdx)) {
5086 uint64_t Value = *Imm;
5087 bool IsForcedFP64 =
5088 Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_KIMM64 ||
5090 HasMandatoryLiteral);
5091 bool IsFP64 = (IsForcedFP64 || AMDGPU::isSISrcFPOperand(Desc, OpIdx)) &&
5092 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
5093 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
5094
5095 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value) &&
5096 !IsForcedFP64 && (!has64BitLiterals() || Desc.getSize() != 4)) {
5097 Error(getOperandLoc(Operands, OpIdx),
5098 "invalid operand for instruction");
5099 return false;
5100 }
5101
5102 if (IsFP64 && IsValid32Op && !IsForcedFP64)
5103 Value = Hi_32(Value);
5104
5105 IsAnotherLiteral = !LiteralValue || *LiteralValue != Value;
5107 }
5108
5109 if (IsAnotherLiteral && !HasMandatoryLiteral &&
5110 !getFeatureBits()[FeatureVOP3Literal]) {
5111 Error(getOperandLoc(Operands, OpIdx),
5112 "literal operands are not supported");
5113 return false;
5114 }
5115
5116 if (LiteralOpIdx && IsAnotherLiteral) {
5117 Error(getLaterLoc(getOperandLoc(Operands, OpIdx),
5118 getOperandLoc(Operands, *LiteralOpIdx)),
5119 "only one unique literal operand is allowed");
5120 return false;
5121 }
5122
5123 if (IsAnotherLiteral)
5124 LiteralOpIdx = OpIdx;
5125 }
5126
5127 return true;
5128}
5129
5130// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
5131static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name,
5132 const MCRegisterInfo *MRI) {
5133 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name);
5134 if (OpIdx < 0)
5135 return -1;
5136
5137 const MCOperand &Op = Inst.getOperand(OpIdx);
5138 if (!Op.isReg())
5139 return -1;
5140
5141 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5142 auto Reg = Sub ? Sub : Op.getReg();
5143 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5144 return AGPR32.contains(Reg) ? 1 : 0;
5145}
5146
5147bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
5148 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5149 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
5151 SIInstrFlags::DS)) == 0)
5152 return true;
5153
5154 AMDGPU::OpName DataName = (TSFlags & SIInstrFlags::DS)
5155 ? AMDGPU::OpName::data0
5156 : AMDGPU::OpName::vdata;
5157
5158 const MCRegisterInfo *MRI = getMRI();
5159 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
5160 int DataAreg = IsAGPROperand(Inst, DataName, MRI);
5161
5162 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
5163 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
5164 if (Data2Areg >= 0 && Data2Areg != DataAreg)
5165 return false;
5166 }
5167
5168 auto FB = getFeatureBits();
5169 if (FB[AMDGPU::FeatureGFX90AInsts]) {
5170 if (DataAreg < 0 || DstAreg < 0)
5171 return true;
5172 return DstAreg == DataAreg;
5173 }
5174
5175 return DstAreg < 1 && DataAreg < 1;
5176}
5177
5178bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
5179 auto FB = getFeatureBits();
5180 if (!FB[AMDGPU::FeatureRequiresAlignedVGPRs])
5181 return true;
5182
5183 unsigned Opc = Inst.getOpcode();
5184 const MCRegisterInfo *MRI = getMRI();
5185 // DS_READ_B96_TR_B6 is the only DS instruction in GFX950, that allows
5186 // unaligned VGPR. All others only allow even aligned VGPRs.
5187 if (FB[AMDGPU::FeatureGFX90AInsts] && Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5188 return true;
5189
5190 if (FB[AMDGPU::FeatureGFX1250Insts]) {
5191 switch (Opc) {
5192 default:
5193 break;
5194 case AMDGPU::DS_LOAD_TR6_B96:
5195 case AMDGPU::DS_LOAD_TR6_B96_gfx12:
5196 // DS_LOAD_TR6_B96 is the only DS instruction in GFX1250, that
5197 // allows unaligned VGPR. All others only allow even aligned VGPRs.
5198 return true;
5199 case AMDGPU::GLOBAL_LOAD_TR6_B96:
5200 case AMDGPU::GLOBAL_LOAD_TR6_B96_gfx1250: {
5201 // GLOBAL_LOAD_TR6_B96 is the only GLOBAL instruction in GFX1250, that
5202 // allows unaligned VGPR for vdst, but other operands still only allow
5203 // even aligned VGPRs.
5204 int VAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
5205 if (VAddrIdx != -1) {
5206 const MCOperand &Op = Inst.getOperand(VAddrIdx);
5207 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5208 if ((Sub - AMDGPU::VGPR0) & 1)
5209 return false;
5210 }
5211 return true;
5212 }
5213 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR:
5214 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR_gfx1250:
5215 return true;
5216 }
5217 }
5218
5219 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5220 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5221 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
5222 const MCOperand &Op = Inst.getOperand(I);
5223 if (!Op.isReg())
5224 continue;
5225
5226 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5227 if (!Sub)
5228 continue;
5229
5230 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
5231 return false;
5232 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
5233 return false;
5234 }
5235
5236 return true;
5237}
5238
5239SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
5240 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5241 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5242 if (Op.isBLGP())
5243 return Op.getStartLoc();
5244 }
5245 return SMLoc();
5246}
5247
5248bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
5249 const OperandVector &Operands) {
5250 unsigned Opc = Inst.getOpcode();
5251 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
5252 if (BlgpIdx == -1)
5253 return true;
5254 SMLoc BLGPLoc = getBLGPLoc(Operands);
5255 if (!BLGPLoc.isValid())
5256 return true;
5257 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
5258 auto FB = getFeatureBits();
5259 bool UsesNeg = false;
5260 if (FB[AMDGPU::FeatureGFX940Insts]) {
5261 switch (Opc) {
5262 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
5263 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
5264 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
5265 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
5266 UsesNeg = true;
5267 }
5268 }
5269
5270 if (IsNeg == UsesNeg)
5271 return true;
5272
5273 Error(BLGPLoc,
5274 UsesNeg ? "invalid modifier: blgp is not supported"
5275 : "invalid modifier: neg is not supported");
5276
5277 return false;
5278}
5279
5280bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
5281 const OperandVector &Operands) {
5282 if (!isGFX11Plus())
5283 return true;
5284
5285 unsigned Opc = Inst.getOpcode();
5286 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
5287 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
5288 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
5289 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
5290 return true;
5291
5292 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
5293 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
5294 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
5295 if (Reg == AMDGPU::SGPR_NULL)
5296 return true;
5297
5298 Error(getOperandLoc(Operands, Src0Idx), "src0 must be null");
5299 return false;
5300}
5301
5302bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
5303 const OperandVector &Operands) {
5304 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5305 if ((TSFlags & SIInstrFlags::DS) == 0)
5306 return true;
5307 if (TSFlags & SIInstrFlags::GWS)
5308 return validateGWS(Inst, Operands);
5309 // Only validate GDS for non-GWS instructions.
5310 if (hasGDS())
5311 return true;
5312 int GDSIdx =
5313 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
5314 if (GDSIdx < 0)
5315 return true;
5316 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
5317 if (GDS) {
5318 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
5319 Error(S, "gds modifier is not supported on this GPU");
5320 return false;
5321 }
5322 return true;
5323}
5324
5325// gfx90a has an undocumented limitation:
5326// DS_GWS opcodes must use even aligned registers.
5327bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
5328 const OperandVector &Operands) {
5329 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5330 return true;
5331
5332 int Opc = Inst.getOpcode();
5333 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5334 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5335 return true;
5336
5337 const MCRegisterInfo *MRI = getMRI();
5338 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5339 int Data0Pos =
5340 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
5341 assert(Data0Pos != -1);
5342 auto Reg = Inst.getOperand(Data0Pos).getReg();
5343 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5344 if (RegIdx & 1) {
5345 Error(getOperandLoc(Operands, Data0Pos), "vgpr must be even aligned");
5346 return false;
5347 }
5348
5349 return true;
5350}
5351
5352bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
5353 const OperandVector &Operands,
5354 SMLoc IDLoc) {
5355 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
5356 AMDGPU::OpName::cpol);
5357 if (CPolPos == -1)
5358 return true;
5359
5360 unsigned CPol = Inst.getOperand(CPolPos).getImm();
5361
5362 if (!isGFX1250()) {
5363 if (CPol & CPol::SCAL) {
5364 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5365 StringRef CStr(S.getPointer());
5366 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5367 Error(S, "scale_offset is not supported on this GPU");
5368 }
5369 if (CPol & CPol::NV) {
5370 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5371 StringRef CStr(S.getPointer());
5372 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]);
5373 Error(S, "nv is not supported on this GPU");
5374 }
5375 }
5376
5377 if ((CPol & CPol::SCAL) && !supportsScaleOffset(MII, Inst.getOpcode())) {
5378 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5379 StringRef CStr(S.getPointer());
5380 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5381 Error(S, "scale_offset is not supported for this instruction");
5382 }
5383
5384 if (isGFX12Plus())
5385 return validateTHAndScopeBits(Inst, Operands, CPol);
5386
5387 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5388 if (TSFlags & SIInstrFlags::SMRD) {
5389 if (CPol && (isSI() || isCI())) {
5390 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5391 Error(S, "cache policy is not supported for SMRD instructions");
5392 return false;
5393 }
5394 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
5395 Error(IDLoc, "invalid cache policy for SMEM instruction");
5396 return false;
5397 }
5398 }
5399
5400 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
5401 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
5404 if (!(TSFlags & AllowSCCModifier)) {
5405 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5406 StringRef CStr(S.getPointer());
5407 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
5408 Error(S,
5409 "scc modifier is not supported for this instruction on this GPU");
5410 return false;
5411 }
5412 }
5413
5415 return true;
5416
5417 if (TSFlags & SIInstrFlags::IsAtomicRet) {
5418 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
5419 Error(IDLoc, isGFX940() ? "instruction must use sc0"
5420 : "instruction must use glc");
5421 return false;
5422 }
5423 } else {
5424 if (CPol & CPol::GLC) {
5425 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5426 StringRef CStr(S.getPointer());
5428 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
5429 Error(S, isGFX940() ? "instruction must not use sc0"
5430 : "instruction must not use glc");
5431 return false;
5432 }
5433 }
5434
5435 return true;
5436}
5437
5438bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
5439 const OperandVector &Operands,
5440 const unsigned CPol) {
5441 const unsigned TH = CPol & AMDGPU::CPol::TH;
5442 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
5443
5444 const unsigned Opcode = Inst.getOpcode();
5445 const MCInstrDesc &TID = MII.get(Opcode);
5446
5447 auto PrintError = [&](StringRef Msg) {
5448 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5449 Error(S, Msg);
5450 return false;
5451 };
5452
5453 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
5456 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
5457
5458 if (TH == 0)
5459 return true;
5460
5461 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
5462 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
5463 (TH == AMDGPU::CPol::TH_NT_HT)))
5464 return PrintError("invalid th value for SMEM instruction");
5465
5466 if (TH == AMDGPU::CPol::TH_BYPASS) {
5467 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5469 (Scope == AMDGPU::CPol::SCOPE_SYS &&
5471 return PrintError("scope and th combination is not valid");
5472 }
5473
5474 unsigned THType = AMDGPU::getTemporalHintType(TID);
5475 if (THType == AMDGPU::CPol::TH_TYPE_ATOMIC) {
5476 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5477 return PrintError("invalid th value for atomic instructions");
5478 } else if (THType == AMDGPU::CPol::TH_TYPE_STORE) {
5479 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5480 return PrintError("invalid th value for store instructions");
5481 } else {
5482 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5483 return PrintError("invalid th value for load instructions");
5484 }
5485
5486 return true;
5487}
5488
5489bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5490 const OperandVector &Operands) {
5491 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5492 if (Desc.mayStore() &&
5494 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5495 if (Loc != getInstLoc(Operands)) {
5496 Error(Loc, "TFE modifier has no meaning for store instructions");
5497 return false;
5498 }
5499 }
5500
5501 return true;
5502}
5503
5504bool AMDGPUAsmParser::validateSetVgprMSB(const MCInst &Inst,
5505 const OperandVector &Operands) {
5506 if (Inst.getOpcode() != AMDGPU::S_SET_VGPR_MSB_gfx12)
5507 return true;
5508
5509 int Simm16Pos =
5510 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::simm16);
5511 if ((unsigned)Inst.getOperand(Simm16Pos).getImm() > 255) {
5512 SMLoc Loc = Operands[1]->getStartLoc();
5513 Error(Loc, "s_set_vgpr_msb accepts values in range [0..255]");
5514 return false;
5515 }
5516
5517 return true;
5518}
5519
5520bool AMDGPUAsmParser::validateWMMA(const MCInst &Inst,
5521 const OperandVector &Operands) {
5522 unsigned Opc = Inst.getOpcode();
5523 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5524 const MCInstrDesc &Desc = MII.get(Opc);
5525
5526 auto validateFmt = [&](AMDGPU::OpName FmtOp, AMDGPU::OpName SrcOp) -> bool {
5527 int FmtIdx = AMDGPU::getNamedOperandIdx(Opc, FmtOp);
5528 if (FmtIdx == -1)
5529 return true;
5530 unsigned Fmt = Inst.getOperand(FmtIdx).getImm();
5531 int SrcIdx = AMDGPU::getNamedOperandIdx(Opc, SrcOp);
5532 unsigned RegSize =
5533 TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[SrcIdx], HwMode))
5534 .getSizeInBits();
5535
5537 return true;
5538
5539 static const char *FmtNames[] = {"MATRIX_FMT_FP8", "MATRIX_FMT_BF8",
5540 "MATRIX_FMT_FP6", "MATRIX_FMT_BF6",
5541 "MATRIX_FMT_FP4"};
5542
5543 Error(getOperandLoc(Operands, SrcIdx),
5544 "wrong register tuple size for " + Twine(FmtNames[Fmt]));
5545 return false;
5546 };
5547
5548 return validateFmt(AMDGPU::OpName::matrix_a_fmt, AMDGPU::OpName::src0) &&
5549 validateFmt(AMDGPU::OpName::matrix_b_fmt, AMDGPU::OpName::src1);
5550}
5551
5552bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, SMLoc IDLoc,
5553 const OperandVector &Operands) {
5554 if (!validateLdsDirect(Inst, Operands))
5555 return false;
5556 if (!validateTrue16OpSel(Inst)) {
5557 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5558 "op_sel operand conflicts with 16-bit operand suffix");
5559 return false;
5560 }
5561 if (!validateSOPLiteral(Inst, Operands))
5562 return false;
5563 if (!validateVOPLiteral(Inst, Operands)) {
5564 return false;
5565 }
5566 if (!validateConstantBusLimitations(Inst, Operands)) {
5567 return false;
5568 }
5569 if (!validateVOPD(Inst, Operands)) {
5570 return false;
5571 }
5572 if (!validateIntClampSupported(Inst)) {
5573 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5574 "integer clamping is not supported on this GPU");
5575 return false;
5576 }
5577 if (!validateOpSel(Inst)) {
5578 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5579 "invalid op_sel operand");
5580 return false;
5581 }
5582 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5583 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5584 "invalid neg_lo operand");
5585 return false;
5586 }
5587 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5588 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5589 "invalid neg_hi operand");
5590 return false;
5591 }
5592 if (!validateDPP(Inst, Operands)) {
5593 return false;
5594 }
5595 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5596 if (!validateMIMGD16(Inst)) {
5597 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5598 "d16 modifier is not supported on this GPU");
5599 return false;
5600 }
5601 if (!validateMIMGDim(Inst, Operands)) {
5602 Error(IDLoc, "missing dim operand");
5603 return false;
5604 }
5605 if (!validateTensorR128(Inst)) {
5606 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5607 "instruction must set modifier r128=0");
5608 return false;
5609 }
5610 if (!validateMIMGMSAA(Inst)) {
5611 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5612 "invalid dim; must be MSAA type");
5613 return false;
5614 }
5615 if (!validateMIMGDataSize(Inst, IDLoc)) {
5616 return false;
5617 }
5618 if (!validateMIMGAddrSize(Inst, IDLoc))
5619 return false;
5620 if (!validateMIMGAtomicDMask(Inst)) {
5621 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5622 "invalid atomic image dmask");
5623 return false;
5624 }
5625 if (!validateMIMGGatherDMask(Inst)) {
5626 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5627 "invalid image_gather dmask: only one bit must be set");
5628 return false;
5629 }
5630 if (!validateMovrels(Inst, Operands)) {
5631 return false;
5632 }
5633 if (!validateOffset(Inst, Operands)) {
5634 return false;
5635 }
5636 if (!validateMAIAccWrite(Inst, Operands)) {
5637 return false;
5638 }
5639 if (!validateMAISrc2(Inst, Operands)) {
5640 return false;
5641 }
5642 if (!validateMFMA(Inst, Operands)) {
5643 return false;
5644 }
5645 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5646 return false;
5647 }
5648
5649 if (!validateAGPRLdSt(Inst)) {
5650 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5651 ? "invalid register class: data and dst should be all VGPR or AGPR"
5652 : "invalid register class: agpr loads and stores not supported on this GPU"
5653 );
5654 return false;
5655 }
5656 if (!validateVGPRAlign(Inst)) {
5657 Error(IDLoc,
5658 "invalid register class: vgpr tuples must be 64 bit aligned");
5659 return false;
5660 }
5661 if (!validateDS(Inst, Operands)) {
5662 return false;
5663 }
5664
5665 if (!validateBLGP(Inst, Operands)) {
5666 return false;
5667 }
5668
5669 if (!validateDivScale(Inst)) {
5670 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5671 return false;
5672 }
5673 if (!validateWaitCnt(Inst, Operands)) {
5674 return false;
5675 }
5676 if (!validateTFE(Inst, Operands)) {
5677 return false;
5678 }
5679 if (!validateSetVgprMSB(Inst, Operands)) {
5680 return false;
5681 }
5682 if (!validateWMMA(Inst, Operands)) {
5683 return false;
5684 }
5685
5686 return true;
5687}
5688
5690 const FeatureBitset &FBS,
5691 unsigned VariantID = 0);
5692
5693static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5694 const FeatureBitset &AvailableFeatures,
5695 unsigned VariantID);
5696
5697bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5698 const FeatureBitset &FBS) {
5699 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5700}
5701
5702bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5703 const FeatureBitset &FBS,
5704 ArrayRef<unsigned> Variants) {
5705 for (auto Variant : Variants) {
5706 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5707 return true;
5708 }
5709
5710 return false;
5711}
5712
5713bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5714 SMLoc IDLoc) {
5715 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5716
5717 // Check if requested instruction variant is supported.
5718 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5719 return false;
5720
5721 // This instruction is not supported.
5722 // Clear any other pending errors because they are no longer relevant.
5723 getParser().clearPendingErrors();
5724
5725 // Requested instruction variant is not supported.
5726 // Check if any other variants are supported.
5727 StringRef VariantName = getMatchedVariantName();
5728 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5729 return Error(IDLoc,
5730 Twine(VariantName,
5731 " variant of this instruction is not supported"));
5732 }
5733
5734 // Check if this instruction may be used with a different wavesize.
5735 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5736 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5737 // FIXME: Use getAvailableFeatures, and do not manually recompute
5738 FeatureBitset FeaturesWS32 = getFeatureBits();
5739 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5740 .flip(AMDGPU::FeatureWavefrontSize32);
5741 FeatureBitset AvailableFeaturesWS32 =
5742 ComputeAvailableFeatures(FeaturesWS32);
5743
5744 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5745 return Error(IDLoc, "instruction requires wavesize=32");
5746 }
5747
5748 // Finally check if this instruction is supported on any other GPU.
5749 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5750 return Error(IDLoc, "instruction not supported on this GPU");
5751 }
5752
5753 // Instruction not supported on any GPU. Probably a typo.
5754 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5755 return Error(IDLoc, "invalid instruction" + Suggestion);
5756}
5757
5759 uint64_t InvalidOprIdx) {
5760 assert(InvalidOprIdx < Operands.size());
5761 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5762 if (Op.isToken() && InvalidOprIdx > 1) {
5763 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5764 return PrevOp.isToken() && PrevOp.getToken() == "::";
5765 }
5766 return false;
5767}
5768
5769bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5771 MCStreamer &Out,
5772 uint64_t &ErrorInfo,
5773 bool MatchingInlineAsm) {
5774 MCInst Inst;
5775 Inst.setLoc(IDLoc);
5776 unsigned Result = Match_Success;
5777 for (auto Variant : getMatchedVariants()) {
5778 uint64_t EI;
5779 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5780 Variant);
5781 // We order match statuses from least to most specific. We use most specific
5782 // status as resulting
5783 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5784 if (R == Match_Success || R == Match_MissingFeature ||
5785 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5786 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5787 Result != Match_MissingFeature)) {
5788 Result = R;
5789 ErrorInfo = EI;
5790 }
5791 if (R == Match_Success)
5792 break;
5793 }
5794
5795 if (Result == Match_Success) {
5796 if (!validateInstruction(Inst, IDLoc, Operands)) {
5797 return true;
5798 }
5799 Out.emitInstruction(Inst, getSTI());
5800 return false;
5801 }
5802
5803 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5804 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5805 return true;
5806 }
5807
5808 switch (Result) {
5809 default: break;
5810 case Match_MissingFeature:
5811 // It has been verified that the specified instruction
5812 // mnemonic is valid. A match was found but it requires
5813 // features which are not supported on this GPU.
5814 return Error(IDLoc, "operands are not valid for this GPU or mode");
5815
5816 case Match_InvalidOperand: {
5817 SMLoc ErrorLoc = IDLoc;
5818 if (ErrorInfo != ~0ULL) {
5819 if (ErrorInfo >= Operands.size()) {
5820 return Error(IDLoc, "too few operands for instruction");
5821 }
5822 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5823 if (ErrorLoc == SMLoc())
5824 ErrorLoc = IDLoc;
5825
5826 if (isInvalidVOPDY(Operands, ErrorInfo))
5827 return Error(ErrorLoc, "invalid VOPDY instruction");
5828 }
5829 return Error(ErrorLoc, "invalid operand for instruction");
5830 }
5831
5832 case Match_MnemonicFail:
5833 llvm_unreachable("Invalid instructions should have been handled already");
5834 }
5835 llvm_unreachable("Implement any new match types added!");
5836}
5837
5838bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5839 int64_t Tmp = -1;
5840 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5841 return true;
5842 }
5843 if (getParser().parseAbsoluteExpression(Tmp)) {
5844 return true;
5845 }
5846 Ret = static_cast<uint32_t>(Tmp);
5847 return false;
5848}
5849
5850bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5851 if (!getSTI().getTargetTriple().isAMDGCN())
5852 return TokError("directive only supported for amdgcn architecture");
5853
5854 std::string TargetIDDirective;
5855 SMLoc TargetStart = getTok().getLoc();
5856 if (getParser().parseEscapedString(TargetIDDirective))
5857 return true;
5858
5859 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5860 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5861 return getParser().Error(TargetRange.Start,
5862 (Twine(".amdgcn_target directive's target id ") +
5863 Twine(TargetIDDirective) +
5864 Twine(" does not match the specified target id ") +
5865 Twine(getTargetStreamer().getTargetID()->toString())).str());
5866
5867 return false;
5868}
5869
5870bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5871 return Error(Range.Start, "value out of range", Range);
5872}
5873
5874bool AMDGPUAsmParser::calculateGPRBlocks(
5875 const FeatureBitset &Features, const MCExpr *VCCUsed,
5876 const MCExpr *FlatScrUsed, bool XNACKUsed,
5877 std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
5878 SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5879 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) {
5880 // TODO(scott.linder): These calculations are duplicated from
5881 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5882 IsaVersion Version = getIsaVersion(getSTI().getCPU());
5883 MCContext &Ctx = getContext();
5884
5885 const MCExpr *NumSGPRs = NextFreeSGPR;
5886 int64_t EvaluatedSGPRs;
5887
5888 if (Version.Major >= 10)
5890 else {
5891 unsigned MaxAddressableNumSGPRs =
5893
5894 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && Version.Major >= 8 &&
5895 !Features.test(FeatureSGPRInitBug) &&
5896 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5897 return OutOfRangeError(SGPRRange);
5898
5899 const MCExpr *ExtraSGPRs =
5900 AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
5901 NumSGPRs = MCBinaryExpr::createAdd(NumSGPRs, ExtraSGPRs, Ctx);
5902
5903 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5904 (Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5905 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5906 return OutOfRangeError(SGPRRange);
5907
5908 if (Features.test(FeatureSGPRInitBug))
5909 NumSGPRs =
5911 }
5912
5913 // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
5914 // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
5915 auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
5916 unsigned Granule) -> const MCExpr * {
5917 const MCExpr *OneConst = MCConstantExpr::create(1ul, Ctx);
5918 const MCExpr *GranuleConst = MCConstantExpr::create(Granule, Ctx);
5919 const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);
5920 const MCExpr *AlignToGPR =
5921 AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);
5922 const MCExpr *DivGPR =
5923 MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);
5924 const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
5925 return SubGPR;
5926 };
5927
5928 VGPRBlocks = GetNumGPRBlocks(
5929 NextFreeVGPR,
5930 IsaInfo::getVGPREncodingGranule(&getSTI(), EnableWavefrontSize32));
5931 SGPRBlocks =
5932 GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(&getSTI()));
5933
5934 return false;
5935}
5936
5937bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5938 if (!getSTI().getTargetTriple().isAMDGCN())
5939 return TokError("directive only supported for amdgcn architecture");
5940
5941 if (!isHsaAbi(getSTI()))
5942 return TokError("directive only supported for amdhsa OS");
5943
5944 StringRef KernelName;
5945 if (getParser().parseIdentifier(KernelName))
5946 return true;
5947
5948 AMDGPU::MCKernelDescriptor KD =
5950 &getSTI(), getContext());
5951
5952 StringSet<> Seen;
5953
5954 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5955
5956 const MCExpr *ZeroExpr = MCConstantExpr::create(0, getContext());
5957 const MCExpr *OneExpr = MCConstantExpr::create(1, getContext());
5958
5959 SMRange VGPRRange;
5960 const MCExpr *NextFreeVGPR = ZeroExpr;
5961 const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext());
5962 const MCExpr *NamedBarCnt = ZeroExpr;
5963 uint64_t SharedVGPRCount = 0;
5964 uint64_t PreloadLength = 0;
5965 uint64_t PreloadOffset = 0;
5966 SMRange SGPRRange;
5967 const MCExpr *NextFreeSGPR = ZeroExpr;
5968
5969 // Count the number of user SGPRs implied from the enabled feature bits.
5970 unsigned ImpliedUserSGPRCount = 0;
5971
5972 // Track if the asm explicitly contains the directive for the user SGPR
5973 // count.
5974 std::optional<unsigned> ExplicitUserSGPRCount;
5975 const MCExpr *ReserveVCC = OneExpr;
5976 const MCExpr *ReserveFlatScr = OneExpr;
5977 std::optional<bool> EnableWavefrontSize32;
5978
5979 while (true) {
5980 while (trySkipToken(AsmToken::EndOfStatement));
5981
5982 StringRef ID;
5983 SMRange IDRange = getTok().getLocRange();
5984 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5985 return true;
5986
5987 if (ID == ".end_amdhsa_kernel")
5988 break;
5989
5990 if (!Seen.insert(ID).second)
5991 return TokError(".amdhsa_ directives cannot be repeated");
5992
5993 SMLoc ValStart = getLoc();
5994 const MCExpr *ExprVal;
5995 if (getParser().parseExpression(ExprVal))
5996 return true;
5997 SMLoc ValEnd = getLoc();
5998 SMRange ValRange = SMRange(ValStart, ValEnd);
5999
6000 int64_t IVal = 0;
6001 uint64_t Val = IVal;
6002 bool EvaluatableExpr;
6003 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
6004 if (IVal < 0)
6005 return OutOfRangeError(ValRange);
6006 Val = IVal;
6007 }
6008
6009#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
6010 if (!isUInt<ENTRY##_WIDTH>(Val)) \
6011 return OutOfRangeError(RANGE); \
6012 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
6013 getContext());
6014
6015// Some fields use the parsed value immediately which requires the expression to
6016// be solvable.
6017#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
6018 if (!(RESOLVED)) \
6019 return Error(IDRange.Start, "directive should have resolvable expression", \
6020 IDRange);
6021
6022 if (ID == ".amdhsa_group_segment_fixed_size") {
6024 CHAR_BIT>(Val))
6025 return OutOfRangeError(ValRange);
6026 KD.group_segment_fixed_size = ExprVal;
6027 } else if (ID == ".amdhsa_private_segment_fixed_size") {
6029 CHAR_BIT>(Val))
6030 return OutOfRangeError(ValRange);
6031 KD.private_segment_fixed_size = ExprVal;
6032 } else if (ID == ".amdhsa_kernarg_size") {
6033 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
6034 return OutOfRangeError(ValRange);
6035 KD.kernarg_size = ExprVal;
6036 } else if (ID == ".amdhsa_user_sgpr_count") {
6037 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6038 ExplicitUserSGPRCount = Val;
6039 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
6040 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6042 return Error(IDRange.Start,
6043 "directive is not supported with architected flat scratch",
6044 IDRange);
6046 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
6047 ExprVal, ValRange);
6048 if (Val)
6049 ImpliedUserSGPRCount += 4;
6050 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
6051 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6052 if (!hasKernargPreload())
6053 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6054
6055 if (Val > getMaxNumUserSGPRs())
6056 return OutOfRangeError(ValRange);
6057 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
6058 ValRange);
6059 if (Val) {
6060 ImpliedUserSGPRCount += Val;
6061 PreloadLength = Val;
6062 }
6063 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
6064 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6065 if (!hasKernargPreload())
6066 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6067
6068 if (Val >= 1024)
6069 return OutOfRangeError(ValRange);
6070 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
6071 ValRange);
6072 if (Val)
6073 PreloadOffset = Val;
6074 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
6075 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6077 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
6078 ValRange);
6079 if (Val)
6080 ImpliedUserSGPRCount += 2;
6081 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
6082 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6084 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
6085 ValRange);
6086 if (Val)
6087 ImpliedUserSGPRCount += 2;
6088 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
6089 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6091 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
6092 ExprVal, ValRange);
6093 if (Val)
6094 ImpliedUserSGPRCount += 2;
6095 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
6096 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6098 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
6099 ValRange);
6100 if (Val)
6101 ImpliedUserSGPRCount += 2;
6102 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
6104 return Error(IDRange.Start,
6105 "directive is not supported with architected flat scratch",
6106 IDRange);
6107 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6109 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
6110 ExprVal, ValRange);
6111 if (Val)
6112 ImpliedUserSGPRCount += 2;
6113 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
6114 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6116 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
6117 ExprVal, ValRange);
6118 if (Val)
6119 ImpliedUserSGPRCount += 1;
6120 } else if (ID == ".amdhsa_wavefront_size32") {
6121 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6122 if (IVersion.Major < 10)
6123 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6124 EnableWavefrontSize32 = Val;
6126 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
6127 ValRange);
6128 } else if (ID == ".amdhsa_uses_dynamic_stack") {
6130 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
6131 ValRange);
6132 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
6134 return Error(IDRange.Start,
6135 "directive is not supported with architected flat scratch",
6136 IDRange);
6138 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6139 ValRange);
6140 } else if (ID == ".amdhsa_enable_private_segment") {
6142 return Error(
6143 IDRange.Start,
6144 "directive is not supported without architected flat scratch",
6145 IDRange);
6147 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6148 ValRange);
6149 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
6151 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
6152 ValRange);
6153 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
6155 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
6156 ValRange);
6157 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
6159 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
6160 ValRange);
6161 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
6163 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
6164 ValRange);
6165 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
6167 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
6168 ValRange);
6169 } else if (ID == ".amdhsa_next_free_vgpr") {
6170 VGPRRange = ValRange;
6171 NextFreeVGPR = ExprVal;
6172 } else if (ID == ".amdhsa_next_free_sgpr") {
6173 SGPRRange = ValRange;
6174 NextFreeSGPR = ExprVal;
6175 } else if (ID == ".amdhsa_accum_offset") {
6176 if (!isGFX90A())
6177 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6178 AccumOffset = ExprVal;
6179 } else if (ID == ".amdhsa_named_barrier_count") {
6180 if (!isGFX1250())
6181 return Error(IDRange.Start, "directive requires gfx1250+", IDRange);
6182 NamedBarCnt = ExprVal;
6183 } else if (ID == ".amdhsa_reserve_vcc") {
6184 if (EvaluatableExpr && !isUInt<1>(Val))
6185 return OutOfRangeError(ValRange);
6186 ReserveVCC = ExprVal;
6187 } else if (ID == ".amdhsa_reserve_flat_scratch") {
6188 if (IVersion.Major < 7)
6189 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
6191 return Error(IDRange.Start,
6192 "directive is not supported with architected flat scratch",
6193 IDRange);
6194 if (EvaluatableExpr && !isUInt<1>(Val))
6195 return OutOfRangeError(ValRange);
6196 ReserveFlatScr = ExprVal;
6197 } else if (ID == ".amdhsa_reserve_xnack_mask") {
6198 if (IVersion.Major < 8)
6199 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
6200 if (!isUInt<1>(Val))
6201 return OutOfRangeError(ValRange);
6202 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
6203 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
6204 IDRange);
6205 } else if (ID == ".amdhsa_float_round_mode_32") {
6207 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
6208 ValRange);
6209 } else if (ID == ".amdhsa_float_round_mode_16_64") {
6211 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
6212 ValRange);
6213 } else if (ID == ".amdhsa_float_denorm_mode_32") {
6215 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
6216 ValRange);
6217 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
6219 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
6220 ValRange);
6221 } else if (ID == ".amdhsa_dx10_clamp") {
6222 if (IVersion.Major >= 12)
6223 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
6225 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
6226 ValRange);
6227 } else if (ID == ".amdhsa_ieee_mode") {
6228 if (IVersion.Major >= 12)
6229 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
6231 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
6232 ValRange);
6233 } else if (ID == ".amdhsa_fp16_overflow") {
6234 if (IVersion.Major < 9)
6235 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
6237 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
6238 ValRange);
6239 } else if (ID == ".amdhsa_tg_split") {
6240 if (!isGFX90A())
6241 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6242 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
6243 ExprVal, ValRange);
6244 } else if (ID == ".amdhsa_workgroup_processor_mode") {
6245 if (!supportsWGP(getSTI()))
6246 return Error(IDRange.Start,
6247 "directive unsupported on " + getSTI().getCPU(), IDRange);
6249 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
6250 ValRange);
6251 } else if (ID == ".amdhsa_memory_ordered") {
6252 if (IVersion.Major < 10)
6253 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6255 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
6256 ValRange);
6257 } else if (ID == ".amdhsa_forward_progress") {
6258 if (IVersion.Major < 10)
6259 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6261 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
6262 ValRange);
6263 } else if (ID == ".amdhsa_shared_vgpr_count") {
6264 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6265 if (IVersion.Major < 10 || IVersion.Major >= 12)
6266 return Error(IDRange.Start, "directive requires gfx10 or gfx11",
6267 IDRange);
6268 SharedVGPRCount = Val;
6270 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
6271 ValRange);
6272 } else if (ID == ".amdhsa_inst_pref_size") {
6273 if (IVersion.Major < 11)
6274 return Error(IDRange.Start, "directive requires gfx11+", IDRange);
6275 if (IVersion.Major == 11) {
6277 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
6278 ValRange);
6279 } else {
6281 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
6282 ValRange);
6283 }
6284 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
6287 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
6288 ExprVal, ValRange);
6289 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
6291 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
6292 ExprVal, ValRange);
6293 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
6296 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
6297 ExprVal, ValRange);
6298 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
6300 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
6301 ExprVal, ValRange);
6302 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
6304 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
6305 ExprVal, ValRange);
6306 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
6308 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
6309 ExprVal, ValRange);
6310 } else if (ID == ".amdhsa_exception_int_div_zero") {
6312 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
6313 ExprVal, ValRange);
6314 } else if (ID == ".amdhsa_round_robin_scheduling") {
6315 if (IVersion.Major < 12)
6316 return Error(IDRange.Start, "directive requires gfx12+", IDRange);
6318 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
6319 ValRange);
6320 } else {
6321 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
6322 }
6323
6324#undef PARSE_BITS_ENTRY
6325 }
6326
6327 if (!Seen.contains(".amdhsa_next_free_vgpr"))
6328 return TokError(".amdhsa_next_free_vgpr directive is required");
6329
6330 if (!Seen.contains(".amdhsa_next_free_sgpr"))
6331 return TokError(".amdhsa_next_free_sgpr directive is required");
6332
6333 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
6334
6335 // Consider the case where the total number of UserSGPRs with trailing
6336 // allocated preload SGPRs, is greater than the number of explicitly
6337 // referenced SGPRs.
6338 if (PreloadLength) {
6339 MCContext &Ctx = getContext();
6340 NextFreeSGPR = AMDGPUMCExpr::createMax(
6341 {NextFreeSGPR, MCConstantExpr::create(UserSGPRCount, Ctx)}, Ctx);
6342 }
6343
6344 const MCExpr *VGPRBlocks;
6345 const MCExpr *SGPRBlocks;
6346 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
6347 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
6348 EnableWavefrontSize32, NextFreeVGPR,
6349 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
6350 SGPRBlocks))
6351 return true;
6352
6353 int64_t EvaluatedVGPRBlocks;
6354 bool VGPRBlocksEvaluatable =
6355 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
6356 if (VGPRBlocksEvaluatable &&
6358 static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
6359 return OutOfRangeError(VGPRRange);
6360 }
6362 KD.compute_pgm_rsrc1, VGPRBlocks,
6363 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
6364 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
6365
6366 int64_t EvaluatedSGPRBlocks;
6367 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
6369 static_cast<uint64_t>(EvaluatedSGPRBlocks)))
6370 return OutOfRangeError(SGPRRange);
6372 KD.compute_pgm_rsrc1, SGPRBlocks,
6373 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
6374 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
6375
6376 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
6377 return TokError("amdgpu_user_sgpr_count smaller than than implied by "
6378 "enabled user SGPRs");
6379
6380 if (isGFX1250()) {
6382 return TokError("too many user SGPRs enabled");
6385 MCConstantExpr::create(UserSGPRCount, getContext()),
6386 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
6387 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT, getContext());
6388 } else {
6390 UserSGPRCount))
6391 return TokError("too many user SGPRs enabled");
6394 MCConstantExpr::create(UserSGPRCount, getContext()),
6395 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
6396 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT, getContext());
6397 }
6398
6399 int64_t IVal = 0;
6400 if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
6401 return TokError("Kernarg size should be resolvable");
6402 uint64_t kernarg_size = IVal;
6403 if (PreloadLength && kernarg_size &&
6404 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
6405 return TokError("Kernarg preload length + offset is larger than the "
6406 "kernarg segment size");
6407
6408 if (isGFX90A()) {
6409 if (!Seen.contains(".amdhsa_accum_offset"))
6410 return TokError(".amdhsa_accum_offset directive is required");
6411 int64_t EvaluatedAccum;
6412 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
6413 uint64_t UEvaluatedAccum = EvaluatedAccum;
6414 if (AccumEvaluatable &&
6415 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
6416 return TokError("accum_offset should be in range [4..256] in "
6417 "increments of 4");
6418
6419 int64_t EvaluatedNumVGPR;
6420 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
6421 AccumEvaluatable &&
6422 UEvaluatedAccum >
6423 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
6424 return TokError("accum_offset exceeds total VGPR allocation");
6425 const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
6427 AccumOffset, MCConstantExpr::create(4, getContext()), getContext()),
6430 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6431 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6432 getContext());
6433 }
6434
6435 if (isGFX1250())
6437 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
6438 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
6439 getContext());
6440
6441 if (IVersion.Major >= 10 && IVersion.Major < 12) {
6442 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
6443 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6444 return TokError("shared_vgpr_count directive not valid on "
6445 "wavefront size 32");
6446 }
6447
6448 if (VGPRBlocksEvaluatable &&
6449 (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
6450 63)) {
6451 return TokError("shared_vgpr_count*2 + "
6452 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6453 "exceed 63\n");
6454 }
6455 }
6456
6457 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
6458 NextFreeVGPR, NextFreeSGPR,
6459 ReserveVCC, ReserveFlatScr);
6460 return false;
6461}
6462
6463bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6464 uint32_t Version;
6465 if (ParseAsAbsoluteExpression(Version))
6466 return true;
6467
6468 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
6469 return false;
6470}
6471
6472bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
6473 AMDGPUMCKernelCodeT &C) {
6474 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
6475 // assembly for backwards compatibility.
6476 if (ID == "max_scratch_backing_memory_byte_size") {
6477 Parser.eatToEndOfStatement();
6478 return false;
6479 }
6480
6481 SmallString<40> ErrStr;
6482 raw_svector_ostream Err(ErrStr);
6483 if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
6484 return TokError(Err.str());
6485 }
6486 Lex();
6487
6488 if (ID == "enable_wavefront_size32") {
6489 if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
6490 if (!isGFX10Plus())
6491 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
6492 if (!isWave32())
6493 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
6494 } else {
6495 if (!isWave64())
6496 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
6497 }
6498 }
6499
6500 if (ID == "wavefront_size") {
6501 if (C.wavefront_size == 5) {
6502 if (!isGFX10Plus())
6503 return TokError("wavefront_size=5 is only allowed on GFX10+");
6504 if (!isWave32())
6505 return TokError("wavefront_size=5 requires +WavefrontSize32");
6506 } else if (C.wavefront_size == 6) {
6507 if (!isWave64())
6508 return TokError("wavefront_size=6 requires +WavefrontSize64");
6509 }
6510 }
6511
6512 return false;
6513}
6514
6515bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6516 AMDGPUMCKernelCodeT KernelCode;
6517 KernelCode.initDefault(&getSTI(), getContext());
6518
6519 while (true) {
6520 // Lex EndOfStatement. This is in a while loop, because lexing a comment
6521 // will set the current token to EndOfStatement.
6522 while(trySkipToken(AsmToken::EndOfStatement));
6523
6524 StringRef ID;
6525 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
6526 return true;
6527
6528 if (ID == ".end_amd_kernel_code_t")
6529 break;
6530
6531 if (ParseAMDKernelCodeTValue(ID, KernelCode))
6532 return true;
6533 }
6534
6535 KernelCode.validate(&getSTI(), getContext());
6536 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6537
6538 return false;
6539}
6540
6541bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6542 StringRef KernelName;
6543 if (!parseId(KernelName, "expected symbol name"))
6544 return true;
6545
6546 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6548
6549 KernelScope.initialize(getContext());
6550 return false;
6551}
6552
6553bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6554 if (!getSTI().getTargetTriple().isAMDGCN()) {
6555 return Error(getLoc(),
6556 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6557 "architectures");
6558 }
6559
6560 auto TargetIDDirective = getLexer().getTok().getStringContents();
6561 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6562 return Error(getParser().getTok().getLoc(), "target id must match options");
6563
6564 getTargetStreamer().EmitISAVersion();
6565 Lex();
6566
6567 return false;
6568}
6569
6570bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6571 assert(isHsaAbi(getSTI()));
6572
6573 std::string HSAMetadataString;
6574 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
6575 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
6576 return true;
6577
6578 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6579 return Error(getLoc(), "invalid HSA metadata");
6580
6581 return false;
6582}
6583
6584/// Common code to parse out a block of text (typically YAML) between start and
6585/// end directives.
6586bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6587 const char *AssemblerDirectiveEnd,
6588 std::string &CollectString) {
6589
6590 raw_string_ostream CollectStream(CollectString);
6591
6592 getLexer().setSkipSpace(false);
6593
6594 bool FoundEnd = false;
6595 while (!isToken(AsmToken::Eof)) {
6596 while (isToken(AsmToken::Space)) {
6597 CollectStream << getTokenStr();
6598 Lex();
6599 }
6600
6601 if (trySkipId(AssemblerDirectiveEnd)) {
6602 FoundEnd = true;
6603 break;
6604 }
6605
6606 CollectStream << Parser.parseStringToEndOfStatement()
6607 << getContext().getAsmInfo()->getSeparatorString();
6608
6609 Parser.eatToEndOfStatement();
6610 }
6611
6612 getLexer().setSkipSpace(true);
6613
6614 if (isToken(AsmToken::Eof) && !FoundEnd) {
6615 return TokError(Twine("expected directive ") +
6616 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6617 }
6618
6619 return false;
6620}
6621
6622/// Parse the assembler directive for new MsgPack-format PAL metadata.
6623bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6624 std::string String;
6625 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6627 return true;
6628
6629 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6630 if (!PALMetadata->setFromString(String))
6631 return Error(getLoc(), "invalid PAL metadata");
6632 return false;
6633}
6634
6635/// Parse the assembler directive for old linear-format PAL metadata.
6636bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6637 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6638 return Error(getLoc(),
6639 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6640 "not available on non-amdpal OSes")).str());
6641 }
6642
6643 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6644 PALMetadata->setLegacy();
6645 for (;;) {
6646 uint32_t Key, Value;
6647 if (ParseAsAbsoluteExpression(Key)) {
6648 return TokError(Twine("invalid value in ") +
6650 }
6651 if (!trySkipToken(AsmToken::Comma)) {
6652 return TokError(Twine("expected an even number of values in ") +
6654 }
6655 if (ParseAsAbsoluteExpression(Value)) {
6656 return TokError(Twine("invalid value in ") +
6658 }
6659 PALMetadata->setRegister(Key, Value);
6660 if (!trySkipToken(AsmToken::Comma))
6661 break;
6662 }
6663 return false;
6664}
6665
6666/// ParseDirectiveAMDGPULDS
6667/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6668bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6669 if (getParser().checkForValidSection())
6670 return true;
6671
6672 StringRef Name;
6673 SMLoc NameLoc = getLoc();
6674 if (getParser().parseIdentifier(Name))
6675 return TokError("expected identifier in directive");
6676
6677 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6678 if (getParser().parseComma())
6679 return true;
6680
6681 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
6682
6683 int64_t Size;
6684 SMLoc SizeLoc = getLoc();
6685 if (getParser().parseAbsoluteExpression(Size))
6686 return true;
6687 if (Size < 0)
6688 return Error(SizeLoc, "size must be non-negative");
6689 if (Size > LocalMemorySize)
6690 return Error(SizeLoc, "size is too large");
6691
6692 int64_t Alignment = 4;
6693 if (trySkipToken(AsmToken::Comma)) {
6694 SMLoc AlignLoc = getLoc();
6695 if (getParser().parseAbsoluteExpression(Alignment))
6696 return true;
6697 if (Alignment < 0 || !isPowerOf2_64(Alignment))
6698 return Error(AlignLoc, "alignment must be a power of two");
6699
6700 // Alignment larger than the size of LDS is possible in theory, as long
6701 // as the linker manages to place to symbol at address 0, but we do want
6702 // to make sure the alignment fits nicely into a 32-bit integer.
6703 if (Alignment >= 1u << 31)
6704 return Error(AlignLoc, "alignment is too large");
6705 }
6706
6707 if (parseEOL())
6708 return true;
6709
6710 Symbol->redefineIfPossible();
6711 if (!Symbol->isUndefined())
6712 return Error(NameLoc, "invalid symbol redefinition");
6713
6714 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6715 return false;
6716}
6717
6718bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6719 StringRef IDVal = DirectiveID.getString();
6720
6721 if (isHsaAbi(getSTI())) {
6722 if (IDVal == ".amdhsa_kernel")
6723 return ParseDirectiveAMDHSAKernel();
6724
6725 if (IDVal == ".amdhsa_code_object_version")
6726 return ParseDirectiveAMDHSACodeObjectVersion();
6727
6728 // TODO: Restructure/combine with PAL metadata directive.
6730 return ParseDirectiveHSAMetadata();
6731 } else {
6732 if (IDVal == ".amd_kernel_code_t")
6733 return ParseDirectiveAMDKernelCodeT();
6734
6735 if (IDVal == ".amdgpu_hsa_kernel")
6736 return ParseDirectiveAMDGPUHsaKernel();
6737
6738 if (IDVal == ".amd_amdgpu_isa")
6739 return ParseDirectiveISAVersion();
6740
6742 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6743 Twine(" directive is "
6744 "not available on non-amdhsa OSes"))
6745 .str());
6746 }
6747 }
6748
6749 if (IDVal == ".amdgcn_target")
6750 return ParseDirectiveAMDGCNTarget();
6751
6752 if (IDVal == ".amdgpu_lds")
6753 return ParseDirectiveAMDGPULDS();
6754
6755 if (IDVal == PALMD::AssemblerDirectiveBegin)
6756 return ParseDirectivePALMetadataBegin();
6757
6758 if (IDVal == PALMD::AssemblerDirective)
6759 return ParseDirectivePALMetadata();
6760
6761 return true;
6762}
6763
6764bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6765 MCRegister Reg) {
6766 if (MRI.regsOverlap(TTMP12_TTMP13_TTMP14_TTMP15, Reg))
6767 return isGFX9Plus();
6768
6769 // GFX10+ has 2 more SGPRs 104 and 105.
6770 if (MRI.regsOverlap(SGPR104_SGPR105, Reg))
6771 return hasSGPR104_SGPR105();
6772
6773 switch (Reg.id()) {
6774 case SRC_SHARED_BASE_LO:
6775 case SRC_SHARED_BASE:
6776 case SRC_SHARED_LIMIT_LO:
6777 case SRC_SHARED_LIMIT:
6778 case SRC_PRIVATE_BASE_LO:
6779 case SRC_PRIVATE_BASE:
6780 case SRC_PRIVATE_LIMIT_LO:
6781 case SRC_PRIVATE_LIMIT:
6782 return isGFX9Plus();
6783 case SRC_FLAT_SCRATCH_BASE_LO:
6784 case SRC_FLAT_SCRATCH_BASE_HI:
6785 return hasGloballyAddressableScratch();
6786 case SRC_POPS_EXITING_WAVE_ID:
6787 return isGFX9Plus() && !isGFX11Plus();
6788 case TBA:
6789 case TBA_LO:
6790 case TBA_HI:
6791 case TMA:
6792 case TMA_LO:
6793 case TMA_HI:
6794 return !isGFX9Plus();
6795 case XNACK_MASK:
6796 case XNACK_MASK_LO:
6797 case XNACK_MASK_HI:
6798 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6799 case SGPR_NULL:
6800 return isGFX10Plus();
6801 case SRC_EXECZ:
6802 case SRC_VCCZ:
6803 return !isGFX11Plus();
6804 default:
6805 break;
6806 }
6807
6808 if (isCI())
6809 return true;
6810
6811 if (isSI() || isGFX10Plus()) {
6812 // No flat_scr on SI.
6813 // On GFX10Plus flat scratch is not a valid register operand and can only be
6814 // accessed with s_setreg/s_getreg.
6815 switch (Reg.id()) {
6816 case FLAT_SCR:
6817 case FLAT_SCR_LO:
6818 case FLAT_SCR_HI:
6819 return false;
6820 default:
6821 return true;
6822 }
6823 }
6824
6825 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6826 // SI/CI have.
6827 if (MRI.regsOverlap(SGPR102_SGPR103, Reg))
6828 return hasSGPR102_SGPR103();
6829
6830 return true;
6831}
6832
6833ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6834 StringRef Mnemonic,
6835 OperandMode Mode) {
6836 ParseStatus Res = parseVOPD(Operands);
6837 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6838 return Res;
6839
6840 // Try to parse with a custom parser
6841 Res = MatchOperandParserImpl(Operands, Mnemonic);
6842
6843 // If we successfully parsed the operand or if there as an error parsing,
6844 // we are done.
6845 //
6846 // If we are parsing after we reach EndOfStatement then this means we
6847 // are appending default values to the Operands list. This is only done
6848 // by custom parser, so we shouldn't continue on to the generic parsing.
6849 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6850 return Res;
6851
6852 SMLoc RBraceLoc;
6853 SMLoc LBraceLoc = getLoc();
6854 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6855 unsigned Prefix = Operands.size();
6856
6857 for (;;) {
6858 auto Loc = getLoc();
6859 Res = parseReg(Operands);
6860 if (Res.isNoMatch())
6861 Error(Loc, "expected a register");
6862 if (!Res.isSuccess())
6863 return ParseStatus::Failure;
6864
6865 RBraceLoc = getLoc();
6866 if (trySkipToken(AsmToken::RBrac))
6867 break;
6868
6869 if (!skipToken(AsmToken::Comma,
6870 "expected a comma or a closing square bracket"))
6871 return ParseStatus::Failure;
6872 }
6873
6874 if (Operands.size() - Prefix > 1) {
6875 Operands.insert(Operands.begin() + Prefix,
6876 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6877 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6878 }
6879
6880 return ParseStatus::Success;
6881 }
6882
6883 return parseRegOrImm(Operands);
6884}
6885
6886StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6887 // Clear any forced encodings from the previous instruction.
6888 setForcedEncodingSize(0);
6889 setForcedDPP(false);
6890 setForcedSDWA(false);
6891
6892 if (Name.consume_back("_e64_dpp")) {
6893 setForcedDPP(true);
6894 setForcedEncodingSize(64);
6895 return Name;
6896 }
6897 if (Name.consume_back("_e64")) {
6898 setForcedEncodingSize(64);
6899 return Name;
6900 }
6901 if (Name.consume_back("_e32")) {
6902 setForcedEncodingSize(32);
6903 return Name;
6904 }
6905 if (Name.consume_back("_dpp")) {
6906 setForcedDPP(true);
6907 return Name;
6908 }
6909 if (Name.consume_back("_sdwa")) {
6910 setForcedSDWA(true);
6911 return Name;
6912 }
6913 return Name;
6914}
6915
6916static void applyMnemonicAliases(StringRef &Mnemonic,
6917 const FeatureBitset &Features,
6918 unsigned VariantID);
6919
6920bool AMDGPUAsmParser::parseInstruction(ParseInstructionInfo &Info,
6921 StringRef Name, SMLoc NameLoc,
6923 // Add the instruction mnemonic
6924 Name = parseMnemonicSuffix(Name);
6925
6926 // If the target architecture uses MnemonicAlias, call it here to parse
6927 // operands correctly.
6928 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6929
6930 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6931
6932 bool IsMIMG = Name.starts_with("image_");
6933
6934 while (!trySkipToken(AsmToken::EndOfStatement)) {
6935 OperandMode Mode = OperandMode_Default;
6936 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6937 Mode = OperandMode_NSA;
6938 ParseStatus Res = parseOperand(Operands, Name, Mode);
6939
6940 if (!Res.isSuccess()) {
6941 checkUnsupportedInstruction(Name, NameLoc);
6942 if (!Parser.hasPendingError()) {
6943 // FIXME: use real operand location rather than the current location.
6944 StringRef Msg = Res.isFailure() ? "failed parsing operand."
6945 : "not a valid operand.";
6946 Error(getLoc(), Msg);
6947 }
6948 while (!trySkipToken(AsmToken::EndOfStatement)) {
6949 lex();
6950 }
6951 return true;
6952 }
6953
6954 // Eat the comma or space if there is one.
6955 trySkipToken(AsmToken::Comma);
6956 }
6957
6958 return false;
6959}
6960
6961//===----------------------------------------------------------------------===//
6962// Utility functions
6963//===----------------------------------------------------------------------===//
6964
6965ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6967 SMLoc S = getLoc();
6968 if (!trySkipId(Name))
6969 return ParseStatus::NoMatch;
6970
6971 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
6972 return ParseStatus::Success;
6973}
6974
6975ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
6976 int64_t &IntVal) {
6977
6978 if (!trySkipId(Prefix, AsmToken::Colon))
6979 return ParseStatus::NoMatch;
6980
6982}
6983
6984ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6985 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6986 std::function<bool(int64_t &)> ConvertResult) {
6987 SMLoc S = getLoc();
6988 int64_t Value = 0;
6989
6990 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
6991 if (!Res.isSuccess())
6992 return Res;
6993
6994 if (ConvertResult && !ConvertResult(Value)) {
6995 Error(S, "invalid " + StringRef(Prefix) + " value.");
6996 }
6997
6998 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
6999 return ParseStatus::Success;
7000}
7001
7002ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
7003 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7004 bool (*ConvertResult)(int64_t &)) {
7005 SMLoc S = getLoc();
7006 if (!trySkipId(Prefix, AsmToken::Colon))
7007 return ParseStatus::NoMatch;
7008
7009 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
7010 return ParseStatus::Failure;
7011
7012 unsigned Val = 0;
7013 const unsigned MaxSize = 4;
7014
7015 // FIXME: How to verify the number of elements matches the number of src
7016 // operands?
7017 for (int I = 0; ; ++I) {
7018 int64_t Op;
7019 SMLoc Loc = getLoc();
7020 if (!parseExpr(Op))
7021 return ParseStatus::Failure;
7022
7023 if (Op != 0 && Op != 1)
7024 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
7025
7026 Val |= (Op << I);
7027
7028 if (trySkipToken(AsmToken::RBrac))
7029 break;
7030
7031 if (I + 1 == MaxSize)
7032 return Error(getLoc(), "expected a closing square bracket");
7033
7034 if (!skipToken(AsmToken::Comma, "expected a comma"))
7035 return ParseStatus::Failure;
7036 }
7037
7038 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
7039 return ParseStatus::Success;
7040}
7041
7042ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
7044 AMDGPUOperand::ImmTy ImmTy) {
7045 int64_t Bit;
7046 SMLoc S = getLoc();
7047
7048 if (trySkipId(Name)) {
7049 Bit = 1;
7050 } else if (trySkipId("no", Name)) {
7051 Bit = 0;
7052 } else {
7053 return ParseStatus::NoMatch;
7054 }
7055
7056 if (Name == "r128" && !hasMIMG_R128())
7057 return Error(S, "r128 modifier is not supported on this GPU");
7058 if (Name == "a16" && !hasA16())
7059 return Error(S, "a16 modifier is not supported on this GPU");
7060
7061 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
7062 ImmTy = AMDGPUOperand::ImmTyR128A16;
7063
7064 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
7065 return ParseStatus::Success;
7066}
7067
7068unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
7069 bool &Disabling) const {
7070 Disabling = Id.consume_front("no");
7071
7072 if (isGFX940() && !Mnemo.starts_with("s_")) {
7073 return StringSwitch<unsigned>(Id)
7074 .Case("nt", AMDGPU::CPol::NT)
7075 .Case("sc0", AMDGPU::CPol::SC0)
7076 .Case("sc1", AMDGPU::CPol::SC1)
7077 .Default(0);
7078 }
7079
7080 return StringSwitch<unsigned>(Id)
7081 .Case("dlc", AMDGPU::CPol::DLC)
7082 .Case("glc", AMDGPU::CPol::GLC)
7083 .Case("scc", AMDGPU::CPol::SCC)
7084 .Case("slc", AMDGPU::CPol::SLC)
7085 .Default(0);
7086}
7087
7088ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
7089 if (isGFX12Plus()) {
7090 SMLoc StringLoc = getLoc();
7091
7092 int64_t CPolVal = 0;
7093 ParseStatus ResTH = ParseStatus::NoMatch;
7094 ParseStatus ResScope = ParseStatus::NoMatch;
7095 ParseStatus ResNV = ParseStatus::NoMatch;
7096 ParseStatus ResScal = ParseStatus::NoMatch;
7097
7098 for (;;) {
7099 if (ResTH.isNoMatch()) {
7100 int64_t TH;
7101 ResTH = parseTH(Operands, TH);
7102 if (ResTH.isFailure())
7103 return ResTH;
7104 if (ResTH.isSuccess()) {
7105 CPolVal |= TH;
7106 continue;
7107 }
7108 }
7109
7110 if (ResScope.isNoMatch()) {
7111 int64_t Scope;
7112 ResScope = parseScope(Operands, Scope);
7113 if (ResScope.isFailure())
7114 return ResScope;
7115 if (ResScope.isSuccess()) {
7116 CPolVal |= Scope;
7117 continue;
7118 }
7119 }
7120
7121 // NV bit exists on GFX12+, but does something starting from GFX1250.
7122 // Allow parsing on all GFX12 and fail on validation for better
7123 // diagnostics.
7124 if (ResNV.isNoMatch()) {
7125 if (trySkipId("nv")) {
7126 ResNV = ParseStatus::Success;
7127 CPolVal |= CPol::NV;
7128 continue;
7129 } else if (trySkipId("no", "nv")) {
7130 ResNV = ParseStatus::Success;
7131 continue;
7132 }
7133 }
7134
7135 if (ResScal.isNoMatch()) {
7136 if (trySkipId("scale_offset")) {
7137 ResScal = ParseStatus::Success;
7138 CPolVal |= CPol::SCAL;
7139 continue;
7140 } else if (trySkipId("no", "scale_offset")) {
7141 ResScal = ParseStatus::Success;
7142 continue;
7143 }
7144 }
7145
7146 break;
7147 }
7148
7149 if (ResTH.isNoMatch() && ResScope.isNoMatch() && ResNV.isNoMatch() &&
7150 ResScal.isNoMatch())
7151 return ParseStatus::NoMatch;
7152
7153 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
7154 AMDGPUOperand::ImmTyCPol));
7155 return ParseStatus::Success;
7156 }
7157
7158 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7159 SMLoc OpLoc = getLoc();
7160 unsigned Enabled = 0, Seen = 0;
7161 for (;;) {
7162 SMLoc S = getLoc();
7163 bool Disabling;
7164 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
7165 if (!CPol)
7166 break;
7167
7168 lex();
7169
7170 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
7171 return Error(S, "dlc modifier is not supported on this GPU");
7172
7173 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
7174 return Error(S, "scc modifier is not supported on this GPU");
7175
7176 if (Seen & CPol)
7177 return Error(S, "duplicate cache policy modifier");
7178
7179 if (!Disabling)
7180 Enabled |= CPol;
7181
7182 Seen |= CPol;
7183 }
7184
7185 if (!Seen)
7186 return ParseStatus::NoMatch;
7187
7188 Operands.push_back(
7189 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
7190 return ParseStatus::Success;
7191}
7192
7193ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
7194 int64_t &Scope) {
7195 static const unsigned Scopes[] = {CPol::SCOPE_CU, CPol::SCOPE_SE,
7197
7198 ParseStatus Res = parseStringOrIntWithPrefix(
7199 Operands, "scope", {"SCOPE_CU", "SCOPE_SE", "SCOPE_DEV", "SCOPE_SYS"},
7200 Scope);
7201
7202 if (Res.isSuccess())
7203 Scope = Scopes[Scope];
7204
7205 return Res;
7206}
7207
7208ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
7209 TH = AMDGPU::CPol::TH_RT; // default
7210
7211 StringRef Value;
7212 SMLoc StringLoc;
7213 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
7214 if (!Res.isSuccess())
7215 return Res;
7216
7217 if (Value == "TH_DEFAULT")
7219 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_WB" ||
7220 Value == "TH_LOAD_NT_WB") {
7221 return Error(StringLoc, "invalid th value");
7222 } else if (Value.consume_front("TH_ATOMIC_")) {
7224 } else if (Value.consume_front("TH_LOAD_")) {
7226 } else if (Value.consume_front("TH_STORE_")) {
7228 } else {
7229 return Error(StringLoc, "invalid th value");
7230 }
7231
7232 if (Value == "BYPASS")
7234
7235 if (TH != 0) {
7237 TH |= StringSwitch<int64_t>(Value)
7238 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7239 .Case("RT", AMDGPU::CPol::TH_RT)
7240 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7241 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT)
7242 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
7244 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE)
7245 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
7247 .Default(0xffffffff);
7248 else
7249 TH |= StringSwitch<int64_t>(Value)
7250 .Case("RT", AMDGPU::CPol::TH_RT)
7251 .Case("NT", AMDGPU::CPol::TH_NT)
7252 .Case("HT", AMDGPU::CPol::TH_HT)
7253 .Case("LU", AMDGPU::CPol::TH_LU)
7254 .Case("WB", AMDGPU::CPol::TH_WB)
7255 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
7256 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
7257 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
7258 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
7259 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
7260 .Default(0xffffffff);
7261 }
7262
7263 if (TH == 0xffffffff)
7264 return Error(StringLoc, "invalid th value");
7265
7266 return ParseStatus::Success;
7267}
7268
7269static void
7271 AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
7272 AMDGPUOperand::ImmTy ImmT, int64_t Default = 0,
7273 std::optional<unsigned> InsertAt = std::nullopt) {
7274 auto i = OptionalIdx.find(ImmT);
7275 if (i != OptionalIdx.end()) {
7276 unsigned Idx = i->second;
7277 const AMDGPUOperand &Op =
7278 static_cast<const AMDGPUOperand &>(*Operands[Idx]);
7279 if (InsertAt)
7280 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Op.getImm()));
7281 else
7282 Op.addImmOperands(Inst, 1);
7283 } else {
7284 if (InsertAt.has_value())
7285 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Default));
7286 else
7288 }
7289}
7290
7291ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
7292 StringRef &Value,
7293 SMLoc &StringLoc) {
7294 if (!trySkipId(Prefix, AsmToken::Colon))
7295 return ParseStatus::NoMatch;
7296
7297 StringLoc = getLoc();
7298 return parseId(Value, "expected an identifier") ? ParseStatus::Success
7300}
7301
7302ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7303 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7304 int64_t &IntVal) {
7305 if (!trySkipId(Name, AsmToken::Colon))
7306 return ParseStatus::NoMatch;
7307
7308 SMLoc StringLoc = getLoc();
7309
7310 StringRef Value;
7311 if (isToken(AsmToken::Identifier)) {
7312 Value = getTokenStr();
7313 lex();
7314
7315 for (IntVal = 0; IntVal < (int64_t)Ids.size(); ++IntVal)
7316 if (Value == Ids[IntVal])
7317 break;
7318 } else if (!parseExpr(IntVal))
7319 return ParseStatus::Failure;
7320
7321 if (IntVal < 0 || IntVal >= (int64_t)Ids.size())
7322 return Error(StringLoc, "invalid " + Twine(Name) + " value");
7323
7324 return ParseStatus::Success;
7325}
7326
7327ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7328 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7329 AMDGPUOperand::ImmTy Type) {
7330 SMLoc S = getLoc();
7331 int64_t IntVal;
7332
7333 ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
7334 if (Res.isSuccess())
7335 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S, Type));
7336
7337 return Res;
7338}
7339
7340//===----------------------------------------------------------------------===//
7341// MTBUF format
7342//===----------------------------------------------------------------------===//
7343
7344bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
7345 int64_t MaxVal,
7346 int64_t &Fmt) {
7347 int64_t Val;
7348 SMLoc Loc = getLoc();
7349
7350 auto Res = parseIntWithPrefix(Pref, Val);
7351 if (Res.isFailure())
7352 return false;
7353 if (Res.isNoMatch())
7354 return true;
7355
7356 if (Val < 0 || Val > MaxVal) {
7357 Error(Loc, Twine("out of range ", StringRef(Pref)));
7358 return false;
7359 }
7360
7361 Fmt = Val;
7362 return true;
7363}
7364
7365ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
7366 AMDGPUOperand::ImmTy ImmTy) {
7367 const char *Pref = "index_key";
7368 int64_t ImmVal = 0;
7369 SMLoc Loc = getLoc();
7370 auto Res = parseIntWithPrefix(Pref, ImmVal);
7371 if (!Res.isSuccess())
7372 return Res;
7373
7374 if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
7375 ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
7376 (ImmVal < 0 || ImmVal > 1))
7377 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7378
7379 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
7380 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7381
7382 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
7383 return ParseStatus::Success;
7384}
7385
7386ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
7387 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
7388}
7389
7390ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
7391 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
7392}
7393
7394ParseStatus AMDGPUAsmParser::parseIndexKey32bit(OperandVector &Operands) {
7395 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey32bit);
7396}
7397
7398ParseStatus AMDGPUAsmParser::tryParseMatrixFMT(OperandVector &Operands,
7399 StringRef Name,
7400 AMDGPUOperand::ImmTy Type) {
7401 return parseStringOrIntWithPrefix(Operands, Name,
7402 {"MATRIX_FMT_FP8", "MATRIX_FMT_BF8",
7403 "MATRIX_FMT_FP6", "MATRIX_FMT_BF6",
7404 "MATRIX_FMT_FP4"},
7405 Type);
7406}
7407
7408ParseStatus AMDGPUAsmParser::parseMatrixAFMT(OperandVector &Operands) {
7409 return tryParseMatrixFMT(Operands, "matrix_a_fmt",
7410 AMDGPUOperand::ImmTyMatrixAFMT);
7411}
7412
7413ParseStatus AMDGPUAsmParser::parseMatrixBFMT(OperandVector &Operands) {
7414 return tryParseMatrixFMT(Operands, "matrix_b_fmt",
7415 AMDGPUOperand::ImmTyMatrixBFMT);
7416}
7417
7418ParseStatus AMDGPUAsmParser::tryParseMatrixScale(OperandVector &Operands,
7419 StringRef Name,
7420 AMDGPUOperand::ImmTy Type) {
7421 return parseStringOrIntWithPrefix(
7422 Operands, Name, {"MATRIX_SCALE_ROW0", "MATRIX_SCALE_ROW1"}, Type);
7423}
7424
7425ParseStatus AMDGPUAsmParser::parseMatrixAScale(OperandVector &Operands) {
7426 return tryParseMatrixScale(Operands, "matrix_a_scale",
7427 AMDGPUOperand::ImmTyMatrixAScale);
7428}
7429
7430ParseStatus AMDGPUAsmParser::parseMatrixBScale(OperandVector &Operands) {
7431 return tryParseMatrixScale(Operands, "matrix_b_scale",
7432 AMDGPUOperand::ImmTyMatrixBScale);
7433}
7434
7435ParseStatus AMDGPUAsmParser::tryParseMatrixScaleFmt(OperandVector &Operands,
7436 StringRef Name,
7437 AMDGPUOperand::ImmTy Type) {
7438 return parseStringOrIntWithPrefix(
7439 Operands, Name,
7440 {"MATRIX_SCALE_FMT_E8", "MATRIX_SCALE_FMT_E5M3", "MATRIX_SCALE_FMT_E4M3"},
7441 Type);
7442}
7443
7444ParseStatus AMDGPUAsmParser::parseMatrixAScaleFmt(OperandVector &Operands) {
7445 return tryParseMatrixScaleFmt(Operands, "matrix_a_scale_fmt",
7446 AMDGPUOperand::ImmTyMatrixAScaleFmt);
7447}
7448
7449ParseStatus AMDGPUAsmParser::parseMatrixBScaleFmt(OperandVector &Operands) {
7450 return tryParseMatrixScaleFmt(Operands, "matrix_b_scale_fmt",
7451 AMDGPUOperand::ImmTyMatrixBScaleFmt);
7452}
7453
7454// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
7455// values to live in a joint format operand in the MCInst encoding.
7456ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
7457 using namespace llvm::AMDGPU::MTBUFFormat;
7458
7459 int64_t Dfmt = DFMT_UNDEF;
7460 int64_t Nfmt = NFMT_UNDEF;
7461
7462 // dfmt and nfmt can appear in either order, and each is optional.
7463 for (int I = 0; I < 2; ++I) {
7464 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
7465 return ParseStatus::Failure;
7466
7467 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
7468 return ParseStatus::Failure;
7469
7470 // Skip optional comma between dfmt/nfmt
7471 // but guard against 2 commas following each other.
7472 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
7473 !peekToken().is(AsmToken::Comma)) {
7474 trySkipToken(AsmToken::Comma);
7475 }
7476 }
7477
7478 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
7479 return ParseStatus::NoMatch;
7480
7481 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7482 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7483
7484 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7485 return ParseStatus::Success;
7486}
7487
7488ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
7489 using namespace llvm::AMDGPU::MTBUFFormat;
7490
7491 int64_t Fmt = UFMT_UNDEF;
7492
7493 if (!tryParseFmt("format", UFMT_MAX, Fmt))
7494 return ParseStatus::Failure;
7495
7496 if (Fmt == UFMT_UNDEF)
7497 return ParseStatus::NoMatch;
7498
7499 Format = Fmt;
7500 return ParseStatus::Success;
7501}
7502
7503bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
7504 int64_t &Nfmt,
7505 StringRef FormatStr,
7506 SMLoc Loc) {
7507 using namespace llvm::AMDGPU::MTBUFFormat;
7508 int64_t Format;
7509
7510 Format = getDfmt(FormatStr);
7511 if (Format != DFMT_UNDEF) {
7512 Dfmt = Format;
7513 return true;
7514 }
7515
7516 Format = getNfmt(FormatStr, getSTI());
7517 if (Format != NFMT_UNDEF) {
7518 Nfmt = Format;
7519 return true;
7520 }
7521
7522 Error(Loc, "unsupported format");
7523 return false;
7524}
7525
7526ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7527 SMLoc FormatLoc,
7528 int64_t &Format) {
7529 using namespace llvm::AMDGPU::MTBUFFormat;
7530
7531 int64_t Dfmt = DFMT_UNDEF;
7532 int64_t Nfmt = NFMT_UNDEF;
7533 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
7534 return ParseStatus::Failure;
7535
7536 if (trySkipToken(AsmToken::Comma)) {
7537 StringRef Str;
7538 SMLoc Loc = getLoc();
7539 if (!parseId(Str, "expected a format string") ||
7540 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
7541 return ParseStatus::Failure;
7542 if (Dfmt == DFMT_UNDEF)
7543 return Error(Loc, "duplicate numeric format");
7544 if (Nfmt == NFMT_UNDEF)
7545 return Error(Loc, "duplicate data format");
7546 }
7547
7548 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7549 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7550
7551 if (isGFX10Plus()) {
7552 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
7553 if (Ufmt == UFMT_UNDEF)
7554 return Error(FormatLoc, "unsupported format");
7555 Format = Ufmt;
7556 } else {
7557 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7558 }
7559
7560 return ParseStatus::Success;
7561}
7562
7563ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7564 SMLoc Loc,
7565 int64_t &Format) {
7566 using namespace llvm::AMDGPU::MTBUFFormat;
7567
7568 auto Id = getUnifiedFormat(FormatStr, getSTI());
7569 if (Id == UFMT_UNDEF)
7570 return ParseStatus::NoMatch;
7571
7572 if (!isGFX10Plus())
7573 return Error(Loc, "unified format is not supported on this GPU");
7574
7575 Format = Id;
7576 return ParseStatus::Success;
7577}
7578
7579ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
7580 using namespace llvm::AMDGPU::MTBUFFormat;
7581 SMLoc Loc = getLoc();
7582
7583 if (!parseExpr(Format))
7584 return ParseStatus::Failure;
7585 if (!isValidFormatEncoding(Format, getSTI()))
7586 return Error(Loc, "out of range format");
7587
7588 return ParseStatus::Success;
7589}
7590
7591ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
7592 using namespace llvm::AMDGPU::MTBUFFormat;
7593
7594 if (!trySkipId("format", AsmToken::Colon))
7595 return ParseStatus::NoMatch;
7596
7597 if (trySkipToken(AsmToken::LBrac)) {
7598 StringRef FormatStr;
7599 SMLoc Loc = getLoc();
7600 if (!parseId(FormatStr, "expected a format string"))
7601 return ParseStatus::Failure;
7602
7603 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
7604 if (Res.isNoMatch())
7605 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
7606 if (!Res.isSuccess())
7607 return Res;
7608
7609 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7610 return ParseStatus::Failure;
7611
7612 return ParseStatus::Success;
7613 }
7614
7615 return parseNumericFormat(Format);
7616}
7617
7618ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
7619 using namespace llvm::AMDGPU::MTBUFFormat;
7620
7621 int64_t Format = getDefaultFormatEncoding(getSTI());
7622 ParseStatus Res;
7623 SMLoc Loc = getLoc();
7624
7625 // Parse legacy format syntax.
7626 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
7627 if (Res.isFailure())
7628 return Res;
7629
7630 bool FormatFound = Res.isSuccess();
7631
7632 Operands.push_back(
7633 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7634
7635 if (FormatFound)
7636 trySkipToken(AsmToken::Comma);
7637
7638 if (isToken(AsmToken::EndOfStatement)) {
7639 // We are expecting an soffset operand,
7640 // but let matcher handle the error.
7641 return ParseStatus::Success;
7642 }
7643
7644 // Parse soffset.
7645 Res = parseRegOrImm(Operands);
7646 if (!Res.isSuccess())
7647 return Res;
7648
7649 trySkipToken(AsmToken::Comma);
7650
7651 if (!FormatFound) {
7652 Res = parseSymbolicOrNumericFormat(Format);
7653 if (Res.isFailure())
7654 return Res;
7655 if (Res.isSuccess()) {
7656 auto Size = Operands.size();
7657 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
7658 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7659 Op.setImm(Format);
7660 }
7661 return ParseStatus::Success;
7662 }
7663
7664 if (isId("format") && peekToken().is(AsmToken::Colon))
7665 return Error(getLoc(), "duplicate format");
7666 return ParseStatus::Success;
7667}
7668
7669ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
7670 ParseStatus Res =
7671 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
7672 if (Res.isNoMatch()) {
7673 Res = parseIntWithPrefix("inst_offset", Operands,
7674 AMDGPUOperand::ImmTyInstOffset);
7675 }
7676 return Res;
7677}
7678
7679ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
7680 ParseStatus Res =
7681 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
7682 if (Res.isNoMatch())
7683 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
7684 return Res;
7685}
7686
7687ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7688 ParseStatus Res =
7689 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7690 if (Res.isNoMatch()) {
7691 Res =
7692 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
7693 }
7694 return Res;
7695}
7696
7697//===----------------------------------------------------------------------===//
7698// Exp
7699//===----------------------------------------------------------------------===//
7700
7701void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7702 OptionalImmIndexMap OptionalIdx;
7703
7704 unsigned OperandIdx[4];
7705 unsigned EnMask = 0;
7706 int SrcIdx = 0;
7707
7708 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7709 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7710
7711 // Add the register arguments
7712 if (Op.isReg()) {
7713 assert(SrcIdx < 4);
7714 OperandIdx[SrcIdx] = Inst.size();
7715 Op.addRegOperands(Inst, 1);
7716 ++SrcIdx;
7717 continue;
7718 }
7719
7720 if (Op.isOff()) {
7721 assert(SrcIdx < 4);
7722 OperandIdx[SrcIdx] = Inst.size();
7723 Inst.addOperand(MCOperand::createReg(MCRegister()));
7724 ++SrcIdx;
7725 continue;
7726 }
7727
7728 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7729 Op.addImmOperands(Inst, 1);
7730 continue;
7731 }
7732
7733 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7734 continue;
7735
7736 // Handle optional arguments
7737 OptionalIdx[Op.getImmTy()] = i;
7738 }
7739
7740 assert(SrcIdx == 4);
7741
7742 bool Compr = false;
7743 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7744 Compr = true;
7745 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
7746 Inst.getOperand(OperandIdx[2]).setReg(MCRegister());
7747 Inst.getOperand(OperandIdx[3]).setReg(MCRegister());
7748 }
7749
7750 for (auto i = 0; i < SrcIdx; ++i) {
7751 if (Inst.getOperand(OperandIdx[i]).getReg()) {
7752 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7753 }
7754 }
7755
7756 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
7757 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
7758
7759 Inst.addOperand(MCOperand::createImm(EnMask));
7760}
7761
7762//===----------------------------------------------------------------------===//
7763// s_waitcnt
7764//===----------------------------------------------------------------------===//
7765
7766static bool
7768 const AMDGPU::IsaVersion ISA,
7769 int64_t &IntVal,
7770 int64_t CntVal,
7771 bool Saturate,
7772 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7773 unsigned (*decode)(const IsaVersion &Version, unsigned))
7774{
7775 bool Failed = false;
7776
7777 IntVal = encode(ISA, IntVal, CntVal);
7778 if (CntVal != decode(ISA, IntVal)) {
7779 if (Saturate) {
7780 IntVal = encode(ISA, IntVal, -1);
7781 } else {
7782 Failed = true;
7783 }
7784 }
7785 return Failed;
7786}
7787
7788bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7789
7790 SMLoc CntLoc = getLoc();
7791 StringRef CntName = getTokenStr();
7792
7793 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7794 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7795 return false;
7796
7797 int64_t CntVal;
7798 SMLoc ValLoc = getLoc();
7799 if (!parseExpr(CntVal))
7800 return false;
7801
7802 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7803
7804 bool Failed = true;
7805 bool Sat = CntName.ends_with("_sat");
7806
7807 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7808 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
7809 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7810 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
7811 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7812 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
7813 } else {
7814 Error(CntLoc, "invalid counter name " + CntName);
7815 return false;
7816 }
7817
7818 if (Failed) {
7819 Error(ValLoc, "too large value for " + CntName);
7820 return false;
7821 }
7822
7823 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7824 return false;
7825
7826 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7827 if (isToken(AsmToken::EndOfStatement)) {
7828 Error(getLoc(), "expected a counter name");
7829 return false;
7830 }
7831 }
7832
7833 return true;
7834}
7835
7836ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7837 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7838 int64_t Waitcnt = getWaitcntBitMask(ISA);
7839 SMLoc S = getLoc();
7840
7841 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7842 while (!isToken(AsmToken::EndOfStatement)) {
7843 if (!parseCnt(Waitcnt))
7844 return ParseStatus::Failure;
7845 }
7846 } else {
7847 if (!parseExpr(Waitcnt))
7848 return ParseStatus::Failure;
7849 }
7850
7851 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
7852 return ParseStatus::Success;
7853}
7854
7855bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7856 SMLoc FieldLoc = getLoc();
7857 StringRef FieldName = getTokenStr();
7858 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
7859 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7860 return false;
7861
7862 SMLoc ValueLoc = getLoc();
7863 StringRef ValueName = getTokenStr();
7864 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
7865 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
7866 return false;
7867
7868 unsigned Shift;
7869 if (FieldName == "instid0") {
7870 Shift = 0;
7871 } else if (FieldName == "instskip") {
7872 Shift = 4;
7873 } else if (FieldName == "instid1") {
7874 Shift = 7;
7875 } else {
7876 Error(FieldLoc, "invalid field name " + FieldName);
7877 return false;
7878 }
7879
7880 int Value;
7881 if (Shift == 4) {
7882 // Parse values for instskip.
7883 Value = StringSwitch<int>(ValueName)
7884 .Case("SAME", 0)
7885 .Case("NEXT", 1)
7886 .Case("SKIP_1", 2)
7887 .Case("SKIP_2", 3)
7888 .Case("SKIP_3", 4)
7889 .Case("SKIP_4", 5)
7890 .Default(-1);
7891 } else {
7892 // Parse values for instid0 and instid1.
7893 Value = StringSwitch<int>(ValueName)
7894 .Case("NO_DEP", 0)
7895 .Case("VALU_DEP_1", 1)
7896 .Case("VALU_DEP_2", 2)
7897 .Case("VALU_DEP_3", 3)
7898 .Case("VALU_DEP_4", 4)
7899 .Case("TRANS32_DEP_1", 5)
7900 .Case("TRANS32_DEP_2", 6)
7901 .Case("TRANS32_DEP_3", 7)
7902 .Case("FMA_ACCUM_CYCLE_1", 8)
7903 .Case("SALU_CYCLE_1", 9)
7904 .Case("SALU_CYCLE_2", 10)
7905 .Case("SALU_CYCLE_3", 11)
7906 .Default(-1);
7907 }
7908 if (Value < 0) {
7909 Error(ValueLoc, "invalid value name " + ValueName);
7910 return false;
7911 }
7912
7913 Delay |= Value << Shift;
7914 return true;
7915}
7916
7917ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7918 int64_t Delay = 0;
7919 SMLoc S = getLoc();
7920
7921 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7922 do {
7923 if (!parseDelay(Delay))
7924 return ParseStatus::Failure;
7925 } while (trySkipToken(AsmToken::Pipe));
7926 } else {
7927 if (!parseExpr(Delay))
7928 return ParseStatus::Failure;
7929 }
7930
7931 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
7932 return ParseStatus::Success;
7933}
7934
7935bool
7936AMDGPUOperand::isSWaitCnt() const {
7937 return isImm();
7938}
7939
7940bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7941
7942//===----------------------------------------------------------------------===//
7943// DepCtr
7944//===----------------------------------------------------------------------===//
7945
7946void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7947 StringRef DepCtrName) {
7948 switch (ErrorId) {
7949 case OPR_ID_UNKNOWN:
7950 Error(Loc, Twine("invalid counter name ", DepCtrName));
7951 return;
7952 case OPR_ID_UNSUPPORTED:
7953 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
7954 return;
7955 case OPR_ID_DUPLICATE:
7956 Error(Loc, Twine("duplicate counter name ", DepCtrName));
7957 return;
7958 case OPR_VAL_INVALID:
7959 Error(Loc, Twine("invalid value for ", DepCtrName));
7960 return;
7961 default:
7962 assert(false);
7963 }
7964}
7965
7966bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7967
7968 using namespace llvm::AMDGPU::DepCtr;
7969
7970 SMLoc DepCtrLoc = getLoc();
7971 StringRef DepCtrName = getTokenStr();
7972
7973 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7974 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7975 return false;
7976
7977 int64_t ExprVal;
7978 if (!parseExpr(ExprVal))
7979 return false;
7980
7981 unsigned PrevOprMask = UsedOprMask;
7982 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7983
7984 if (CntVal < 0) {
7985 depCtrError(DepCtrLoc, CntVal, DepCtrName);
7986 return false;
7987 }
7988
7989 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7990 return false;
7991
7992 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7993 if (isToken(AsmToken::EndOfStatement)) {
7994 Error(getLoc(), "expected a counter name");
7995 return false;
7996 }
7997 }
7998
7999 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
8000 DepCtr = (DepCtr & ~CntValMask) | CntVal;
8001 return true;
8002}
8003
8004ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
8005 using namespace llvm::AMDGPU::DepCtr;
8006
8007 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
8008 SMLoc Loc = getLoc();
8009
8010 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
8011 unsigned UsedOprMask = 0;
8012 while (!isToken(AsmToken::EndOfStatement)) {
8013 if (!parseDepCtr(DepCtr, UsedOprMask))
8014 return ParseStatus::Failure;
8015 }
8016 } else {
8017 if (!parseExpr(DepCtr))
8018 return ParseStatus::Failure;
8019 }
8020
8021 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
8022 return ParseStatus::Success;
8023}
8024
8025bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
8026
8027//===----------------------------------------------------------------------===//
8028// hwreg
8029//===----------------------------------------------------------------------===//
8030
8031ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
8032 OperandInfoTy &Offset,
8033 OperandInfoTy &Width) {
8034 using namespace llvm::AMDGPU::Hwreg;
8035
8036 if (!trySkipId("hwreg", AsmToken::LParen))
8037 return ParseStatus::NoMatch;
8038
8039 // The register may be specified by name or using a numeric code
8040 HwReg.Loc = getLoc();
8041 if (isToken(AsmToken::Identifier) &&
8042 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8043 HwReg.IsSymbolic = true;
8044 lex(); // skip register name
8045 } else if (!parseExpr(HwReg.Val, "a register name")) {
8046 return ParseStatus::Failure;
8047 }
8048
8049 if (trySkipToken(AsmToken::RParen))
8050 return ParseStatus::Success;
8051
8052 // parse optional params
8053 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
8054 return ParseStatus::Failure;
8055
8056 Offset.Loc = getLoc();
8057 if (!parseExpr(Offset.Val))
8058 return ParseStatus::Failure;
8059
8060 if (!skipToken(AsmToken::Comma, "expected a comma"))
8061 return ParseStatus::Failure;
8062
8063 Width.Loc = getLoc();
8064 if (!parseExpr(Width.Val) ||
8065 !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8066 return ParseStatus::Failure;
8067
8068 return ParseStatus::Success;
8069}
8070
8071ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
8072 using namespace llvm::AMDGPU::Hwreg;
8073
8074 int64_t ImmVal = 0;
8075 SMLoc Loc = getLoc();
8076
8077 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
8078 HwregId::Default);
8079 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
8080 HwregOffset::Default);
8081 struct : StructuredOpField {
8082 using StructuredOpField::StructuredOpField;
8083 bool validate(AMDGPUAsmParser &Parser) const override {
8084 if (!isUIntN(Width, Val - 1))
8085 return Error(Parser, "only values from 1 to 32 are legal");
8086 return true;
8087 }
8088 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
8089 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
8090
8091 if (Res.isNoMatch())
8092 Res = parseHwregFunc(HwReg, Offset, Width);
8093
8094 if (Res.isSuccess()) {
8095 if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
8096 return ParseStatus::Failure;
8097 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
8098 }
8099
8100 if (Res.isNoMatch() &&
8101 parseExpr(ImmVal, "a hwreg macro, structured immediate"))
8103
8104 if (!Res.isSuccess())
8105 return ParseStatus::Failure;
8106
8107 if (!isUInt<16>(ImmVal))
8108 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8109 Operands.push_back(
8110 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
8111 return ParseStatus::Success;
8112}
8113
8114bool AMDGPUOperand::isHwreg() const {
8115 return isImmTy(ImmTyHwreg);
8116}
8117
8118//===----------------------------------------------------------------------===//
8119// sendmsg
8120//===----------------------------------------------------------------------===//
8121
8122bool
8123AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
8124 OperandInfoTy &Op,
8125 OperandInfoTy &Stream) {
8126 using namespace llvm::AMDGPU::SendMsg;
8127
8128 Msg.Loc = getLoc();
8129 if (isToken(AsmToken::Identifier) &&
8130 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8131 Msg.IsSymbolic = true;
8132 lex(); // skip message name
8133 } else if (!parseExpr(Msg.Val, "a message name")) {
8134 return false;
8135 }
8136
8137 if (trySkipToken(AsmToken::Comma)) {
8138 Op.IsDefined = true;
8139 Op.Loc = getLoc();
8140 if (isToken(AsmToken::Identifier) &&
8141 (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
8143 lex(); // skip operation name
8144 } else if (!parseExpr(Op.Val, "an operation name")) {
8145 return false;
8146 }
8147
8148 if (trySkipToken(AsmToken::Comma)) {
8149 Stream.IsDefined = true;
8150 Stream.Loc = getLoc();
8151 if (!parseExpr(Stream.Val))
8152 return false;
8153 }
8154 }
8155
8156 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
8157}
8158
8159bool
8160AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
8161 const OperandInfoTy &Op,
8162 const OperandInfoTy &Stream) {
8163 using namespace llvm::AMDGPU::SendMsg;
8164
8165 // Validation strictness depends on whether message is specified
8166 // in a symbolic or in a numeric form. In the latter case
8167 // only encoding possibility is checked.
8168 bool Strict = Msg.IsSymbolic;
8169
8170 if (Strict) {
8171 if (Msg.Val == OPR_ID_UNSUPPORTED) {
8172 Error(Msg.Loc, "specified message id is not supported on this GPU");
8173 return false;
8174 }
8175 } else {
8176 if (!isValidMsgId(Msg.Val, getSTI())) {
8177 Error(Msg.Loc, "invalid message id");
8178 return false;
8179 }
8180 }
8181 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
8182 if (Op.IsDefined) {
8183 Error(Op.Loc, "message does not support operations");
8184 } else {
8185 Error(Msg.Loc, "missing message operation");
8186 }
8187 return false;
8188 }
8189 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
8190 if (Op.Val == OPR_ID_UNSUPPORTED)
8191 Error(Op.Loc, "specified operation id is not supported on this GPU");
8192 else
8193 Error(Op.Loc, "invalid operation id");
8194 return false;
8195 }
8196 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
8197 Stream.IsDefined) {
8198 Error(Stream.Loc, "message operation does not support streams");
8199 return false;
8200 }
8201 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
8202 Error(Stream.Loc, "invalid message stream id");
8203 return false;
8204 }
8205 return true;
8206}
8207
8208ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
8209 using namespace llvm::AMDGPU::SendMsg;
8210
8211 int64_t ImmVal = 0;
8212 SMLoc Loc = getLoc();
8213
8214 if (trySkipId("sendmsg", AsmToken::LParen)) {
8215 OperandInfoTy Msg(OPR_ID_UNKNOWN);
8216 OperandInfoTy Op(OP_NONE_);
8217 OperandInfoTy Stream(STREAM_ID_NONE_);
8218 if (parseSendMsgBody(Msg, Op, Stream) &&
8219 validateSendMsg(Msg, Op, Stream)) {
8220 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
8221 } else {
8222 return ParseStatus::Failure;
8223 }
8224 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
8225 if (ImmVal < 0 || !isUInt<16>(ImmVal))
8226 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8227 } else {
8228 return ParseStatus::Failure;
8229 }
8230
8231 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
8232 return ParseStatus::Success;
8233}
8234
8235bool AMDGPUOperand::isSendMsg() const {
8236 return isImmTy(ImmTySendMsg);
8237}
8238
8239//===----------------------------------------------------------------------===//
8240// v_interp
8241//===----------------------------------------------------------------------===//
8242
8243ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
8244 StringRef Str;
8245 SMLoc S = getLoc();
8246
8247 if (!parseId(Str))
8248 return ParseStatus::NoMatch;
8249
8250 int Slot = StringSwitch<int>(Str)
8251 .Case("p10", 0)
8252 .Case("p20", 1)
8253 .Case("p0", 2)
8254 .Default(-1);
8255
8256 if (Slot == -1)
8257 return Error(S, "invalid interpolation slot");
8258
8259 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
8260 AMDGPUOperand::ImmTyInterpSlot));
8261 return ParseStatus::Success;
8262}
8263
8264ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
8265 StringRef Str;
8266 SMLoc S = getLoc();
8267
8268 if (!parseId(Str))
8269 return ParseStatus::NoMatch;
8270
8271 if (!Str.starts_with("attr"))
8272 return Error(S, "invalid interpolation attribute");
8273
8274 StringRef Chan = Str.take_back(2);
8275 int AttrChan = StringSwitch<int>(Chan)
8276 .Case(".x", 0)
8277 .Case(".y", 1)
8278 .Case(".z", 2)
8279 .Case(".w", 3)
8280 .Default(-1);
8281 if (AttrChan == -1)
8282 return Error(S, "invalid or missing interpolation attribute channel");
8283
8284 Str = Str.drop_back(2).drop_front(4);
8285
8286 uint8_t Attr;
8287 if (Str.getAsInteger(10, Attr))
8288 return Error(S, "invalid or missing interpolation attribute number");
8289
8290 if (Attr > 32)
8291 return Error(S, "out of bounds interpolation attribute number");
8292
8293 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
8294
8295 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
8296 AMDGPUOperand::ImmTyInterpAttr));
8297 Operands.push_back(AMDGPUOperand::CreateImm(
8298 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
8299 return ParseStatus::Success;
8300}
8301
8302//===----------------------------------------------------------------------===//
8303// exp
8304//===----------------------------------------------------------------------===//
8305
8306ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
8307 using namespace llvm::AMDGPU::Exp;
8308
8309 StringRef Str;
8310 SMLoc S = getLoc();
8311
8312 if (!parseId(Str))
8313 return ParseStatus::NoMatch;
8314
8315 unsigned Id = getTgtId(Str);
8316 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
8317 return Error(S, (Id == ET_INVALID)
8318 ? "invalid exp target"
8319 : "exp target is not supported on this GPU");
8320
8321 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
8322 AMDGPUOperand::ImmTyExpTgt));
8323 return ParseStatus::Success;
8324}
8325
8326//===----------------------------------------------------------------------===//
8327// parser helpers
8328//===----------------------------------------------------------------------===//
8329
8330bool
8331AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
8332 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
8333}
8334
8335bool
8336AMDGPUAsmParser::isId(const StringRef Id) const {
8337 return isId(getToken(), Id);
8338}
8339
8340bool
8341AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
8342 return getTokenKind() == Kind;
8343}
8344
8345StringRef AMDGPUAsmParser::getId() const {
8346 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
8347}
8348
8349bool
8350AMDGPUAsmParser::trySkipId(const StringRef Id) {
8351 if (isId(Id)) {
8352 lex();
8353 return true;
8354 }
8355 return false;
8356}
8357
8358bool
8359AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
8360 if (isToken(AsmToken::Identifier)) {
8361 StringRef Tok = getTokenStr();
8362 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
8363 lex();
8364 return true;
8365 }
8366 }
8367 return false;
8368}
8369
8370bool
8371AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
8372 if (isId(Id) && peekToken().is(Kind)) {
8373 lex();
8374 lex();
8375 return true;
8376 }
8377 return false;
8378}
8379
8380bool
8381AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
8382 if (isToken(Kind)) {
8383 lex();
8384 return true;
8385 }
8386 return false;
8387}
8388
8389bool
8390AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
8391 const StringRef ErrMsg) {
8392 if (!trySkipToken(Kind)) {
8393 Error(getLoc(), ErrMsg);
8394 return false;
8395 }
8396 return true;
8397}
8398
8399bool
8400AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
8401 SMLoc S = getLoc();
8402
8403 const MCExpr *Expr;
8404 if (Parser.parseExpression(Expr))
8405 return false;
8406
8407 if (Expr->evaluateAsAbsolute(Imm))
8408 return true;
8409
8410 if (Expected.empty()) {
8411 Error(S, "expected absolute expression");
8412 } else {
8413 Error(S, Twine("expected ", Expected) +
8414 Twine(" or an absolute expression"));
8415 }
8416 return false;
8417}
8418
8419bool
8420AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
8421 SMLoc S = getLoc();
8422
8423 const MCExpr *Expr;
8424 if (Parser.parseExpression(Expr))
8425 return false;
8426
8427 int64_t IntVal;
8428 if (Expr->evaluateAsAbsolute(IntVal)) {
8429 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
8430 } else {
8431 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
8432 }
8433 return true;
8434}
8435
8436bool
8437AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
8438 if (isToken(AsmToken::String)) {
8439 Val = getToken().getStringContents();
8440 lex();
8441 return true;
8442 }
8443 Error(getLoc(), ErrMsg);
8444 return false;
8445}
8446
8447bool
8448AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
8449 if (isToken(AsmToken::Identifier)) {
8450 Val = getTokenStr();
8451 lex();
8452 return true;
8453 }
8454 if (!ErrMsg.empty())
8455 Error(getLoc(), ErrMsg);
8456 return false;
8457}
8458
8459AsmToken
8460AMDGPUAsmParser::getToken() const {
8461 return Parser.getTok();
8462}
8463
8464AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
8465 return isToken(AsmToken::EndOfStatement)
8466 ? getToken()
8467 : getLexer().peekTok(ShouldSkipSpace);
8468}
8469
8470void
8471AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
8472 auto TokCount = getLexer().peekTokens(Tokens);
8473
8474 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
8475 Tokens[Idx] = AsmToken(AsmToken::Error, "");
8476}
8477
8479AMDGPUAsmParser::getTokenKind() const {
8480 return getLexer().getKind();
8481}
8482
8483SMLoc
8484AMDGPUAsmParser::getLoc() const {
8485 return getToken().getLoc();
8486}
8487
8488StringRef
8489AMDGPUAsmParser::getTokenStr() const {
8490 return getToken().getString();
8491}
8492
8493void
8494AMDGPUAsmParser::lex() {
8495 Parser.Lex();
8496}
8497
8498SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
8499 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
8500}
8501
8502// Returns one of the given locations that comes later in the source.
8503SMLoc AMDGPUAsmParser::getLaterLoc(SMLoc a, SMLoc b) {
8504 return a.getPointer() < b.getPointer() ? b : a;
8505}
8506
8507SMLoc AMDGPUAsmParser::getOperandLoc(const OperandVector &Operands,
8508 int MCOpIdx) const {
8509 for (const auto &Op : Operands) {
8510 const auto TargetOp = static_cast<AMDGPUOperand &>(*Op);
8511 if (TargetOp.getMCOpIdx() == MCOpIdx)
8512 return TargetOp.getStartLoc();
8513 }
8514 llvm_unreachable("No such MC operand!");
8515}
8516
8517SMLoc
8518AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
8519 const OperandVector &Operands) const {
8520 for (unsigned i = Operands.size() - 1; i > 0; --i) {
8521 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8522 if (Test(Op))
8523 return Op.getStartLoc();
8524 }
8525 return getInstLoc(Operands);
8526}
8527
8528SMLoc
8529AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
8530 const OperandVector &Operands) const {
8531 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
8532 return getOperandLoc(Test, Operands);
8533}
8534
8535ParseStatus
8536AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
8537 if (!trySkipToken(AsmToken::LCurly))
8538 return ParseStatus::NoMatch;
8539
8540 bool First = true;
8541 while (!trySkipToken(AsmToken::RCurly)) {
8542 if (!First &&
8543 !skipToken(AsmToken::Comma, "comma or closing brace expected"))
8544 return ParseStatus::Failure;
8545
8546 StringRef Id = getTokenStr();
8547 SMLoc IdLoc = getLoc();
8548 if (!skipToken(AsmToken::Identifier, "field name expected") ||
8549 !skipToken(AsmToken::Colon, "colon expected"))
8550 return ParseStatus::Failure;
8551
8552 const auto *I =
8553 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
8554 if (I == Fields.end())
8555 return Error(IdLoc, "unknown field");
8556 if ((*I)->IsDefined)
8557 return Error(IdLoc, "duplicate field");
8558
8559 // TODO: Support symbolic values.
8560 (*I)->Loc = getLoc();
8561 if (!parseExpr((*I)->Val))
8562 return ParseStatus::Failure;
8563 (*I)->IsDefined = true;
8564
8565 First = false;
8566 }
8567 return ParseStatus::Success;
8568}
8569
8570bool AMDGPUAsmParser::validateStructuredOpFields(
8572 return all_of(Fields, [this](const StructuredOpField *F) {
8573 return F->validate(*this);
8574 });
8575}
8576
8577//===----------------------------------------------------------------------===//
8578// swizzle
8579//===----------------------------------------------------------------------===//
8580
8582static unsigned
8583encodeBitmaskPerm(const unsigned AndMask,
8584 const unsigned OrMask,
8585 const unsigned XorMask) {
8586 using namespace llvm::AMDGPU::Swizzle;
8587
8588 return BITMASK_PERM_ENC |
8589 (AndMask << BITMASK_AND_SHIFT) |
8590 (OrMask << BITMASK_OR_SHIFT) |
8591 (XorMask << BITMASK_XOR_SHIFT);
8592}
8593
8594bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
8595 const unsigned MaxVal,
8596 const Twine &ErrMsg, SMLoc &Loc) {
8597 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8598 return false;
8599 }
8600 Loc = getLoc();
8601 if (!parseExpr(Op)) {
8602 return false;
8603 }
8604 if (Op < MinVal || Op > MaxVal) {
8605 Error(Loc, ErrMsg);
8606 return false;
8607 }
8608
8609 return true;
8610}
8611
8612bool
8613AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
8614 const unsigned MinVal,
8615 const unsigned MaxVal,
8616 const StringRef ErrMsg) {
8617 SMLoc Loc;
8618 for (unsigned i = 0; i < OpNum; ++i) {
8619 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
8620 return false;
8621 }
8622
8623 return true;
8624}
8625
8626bool
8627AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
8628 using namespace llvm::AMDGPU::Swizzle;
8629
8630 int64_t Lane[LANE_NUM];
8631 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8632 "expected a 2-bit lane id")) {
8634 for (unsigned I = 0; I < LANE_NUM; ++I) {
8635 Imm |= Lane[I] << (LANE_SHIFT * I);
8636 }
8637 return true;
8638 }
8639 return false;
8640}
8641
8642bool
8643AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
8644 using namespace llvm::AMDGPU::Swizzle;
8645
8646 SMLoc Loc;
8647 int64_t GroupSize;
8648 int64_t LaneIdx;
8649
8650 if (!parseSwizzleOperand(GroupSize,
8651 2, 32,
8652 "group size must be in the interval [2,32]",
8653 Loc)) {
8654 return false;
8655 }
8656 if (!isPowerOf2_64(GroupSize)) {
8657 Error(Loc, "group size must be a power of two");
8658 return false;
8659 }
8660 if (parseSwizzleOperand(LaneIdx,
8661 0, GroupSize - 1,
8662 "lane id must be in the interval [0,group size - 1]",
8663 Loc)) {
8664 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
8665 return true;
8666 }
8667 return false;
8668}
8669
8670bool
8671AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8672 using namespace llvm::AMDGPU::Swizzle;
8673
8674 SMLoc Loc;
8675 int64_t GroupSize;
8676
8677 if (!parseSwizzleOperand(GroupSize,
8678 2, 32,
8679 "group size must be in the interval [2,32]",
8680 Loc)) {
8681 return false;
8682 }
8683 if (!isPowerOf2_64(GroupSize)) {
8684 Error(Loc, "group size must be a power of two");
8685 return false;
8686 }
8687
8688 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
8689 return true;
8690}
8691
8692bool
8693AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8694 using namespace llvm::AMDGPU::Swizzle;
8695
8696 SMLoc Loc;
8697 int64_t GroupSize;
8698
8699 if (!parseSwizzleOperand(GroupSize,
8700 1, 16,
8701 "group size must be in the interval [1,16]",
8702 Loc)) {
8703 return false;
8704 }
8705 if (!isPowerOf2_64(GroupSize)) {
8706 Error(Loc, "group size must be a power of two");
8707 return false;
8708 }
8709
8710 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
8711 return true;
8712}
8713
8714bool
8715AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8716 using namespace llvm::AMDGPU::Swizzle;
8717
8718 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8719 return false;
8720 }
8721
8722 StringRef Ctl;
8723 SMLoc StrLoc = getLoc();
8724 if (!parseString(Ctl)) {
8725 return false;
8726 }
8727 if (Ctl.size() != BITMASK_WIDTH) {
8728 Error(StrLoc, "expected a 5-character mask");
8729 return false;
8730 }
8731
8732 unsigned AndMask = 0;
8733 unsigned OrMask = 0;
8734 unsigned XorMask = 0;
8735
8736 for (size_t i = 0; i < Ctl.size(); ++i) {
8737 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8738 switch(Ctl[i]) {
8739 default:
8740 Error(StrLoc, "invalid mask");
8741 return false;
8742 case '0':
8743 break;
8744 case '1':
8745 OrMask |= Mask;
8746 break;
8747 case 'p':
8748 AndMask |= Mask;
8749 break;
8750 case 'i':
8751 AndMask |= Mask;
8752 XorMask |= Mask;
8753 break;
8754 }
8755 }
8756
8757 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8758 return true;
8759}
8760
8761bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &Imm) {
8762 using namespace llvm::AMDGPU::Swizzle;
8763
8764 if (!AMDGPU::isGFX9Plus(getSTI())) {
8765 Error(getLoc(), "FFT mode swizzle not supported on this GPU");
8766 return false;
8767 }
8768
8769 int64_t Swizzle;
8770 SMLoc Loc;
8771 if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
8772 "FFT swizzle must be in the interval [0," +
8773 Twine(FFT_SWIZZLE_MAX) + Twine(']'),
8774 Loc))
8775 return false;
8776
8777 Imm = FFT_MODE_ENC | Swizzle;
8778 return true;
8779}
8780
8781bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &Imm) {
8782 using namespace llvm::AMDGPU::Swizzle;
8783
8784 if (!AMDGPU::isGFX9Plus(getSTI())) {
8785 Error(getLoc(), "Rotate mode swizzle not supported on this GPU");
8786 return false;
8787 }
8788
8789 SMLoc Loc;
8790 int64_t Direction;
8791
8792 if (!parseSwizzleOperand(Direction, 0, 1,
8793 "direction must be 0 (left) or 1 (right)", Loc))
8794 return false;
8795
8796 int64_t RotateSize;
8797 if (!parseSwizzleOperand(
8798 RotateSize, 0, ROTATE_MAX_SIZE,
8799 "number of threads to rotate must be in the interval [0," +
8800 Twine(ROTATE_MAX_SIZE) + Twine(']'),
8801 Loc))
8802 return false;
8803
8805 (RotateSize << ROTATE_SIZE_SHIFT);
8806 return true;
8807}
8808
8809bool
8810AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8811
8812 SMLoc OffsetLoc = getLoc();
8813
8814 if (!parseExpr(Imm, "a swizzle macro")) {
8815 return false;
8816 }
8817 if (!isUInt<16>(Imm)) {
8818 Error(OffsetLoc, "expected a 16-bit offset");
8819 return false;
8820 }
8821 return true;
8822}
8823
8824bool
8825AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8826 using namespace llvm::AMDGPU::Swizzle;
8827
8828 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
8829
8830 SMLoc ModeLoc = getLoc();
8831 bool Ok = false;
8832
8833 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8834 Ok = parseSwizzleQuadPerm(Imm);
8835 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8836 Ok = parseSwizzleBitmaskPerm(Imm);
8837 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8838 Ok = parseSwizzleBroadcast(Imm);
8839 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
8840 Ok = parseSwizzleSwap(Imm);
8841 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8842 Ok = parseSwizzleReverse(Imm);
8843 } else if (trySkipId(IdSymbolic[ID_FFT])) {
8844 Ok = parseSwizzleFFT(Imm);
8845 } else if (trySkipId(IdSymbolic[ID_ROTATE])) {
8846 Ok = parseSwizzleRotate(Imm);
8847 } else {
8848 Error(ModeLoc, "expected a swizzle mode");
8849 }
8850
8851 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
8852 }
8853
8854 return false;
8855}
8856
8857ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8858 SMLoc S = getLoc();
8859 int64_t Imm = 0;
8860
8861 if (trySkipId("offset")) {
8862
8863 bool Ok = false;
8864 if (skipToken(AsmToken::Colon, "expected a colon")) {
8865 if (trySkipId("swizzle")) {
8866 Ok = parseSwizzleMacro(Imm);
8867 } else {
8868 Ok = parseSwizzleOffset(Imm);
8869 }
8870 }
8871
8872 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8873
8875 }
8876 return ParseStatus::NoMatch;
8877}
8878
8879bool
8880AMDGPUOperand::isSwizzle() const {
8881 return isImmTy(ImmTySwizzle);
8882}
8883
8884//===----------------------------------------------------------------------===//
8885// VGPR Index Mode
8886//===----------------------------------------------------------------------===//
8887
8888int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8889
8890 using namespace llvm::AMDGPU::VGPRIndexMode;
8891
8892 if (trySkipToken(AsmToken::RParen)) {
8893 return OFF;
8894 }
8895
8896 int64_t Imm = 0;
8897
8898 while (true) {
8899 unsigned Mode = 0;
8900 SMLoc S = getLoc();
8901
8902 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8903 if (trySkipId(IdSymbolic[ModeId])) {
8904 Mode = 1 << ModeId;
8905 break;
8906 }
8907 }
8908
8909 if (Mode == 0) {
8910 Error(S, (Imm == 0)?
8911 "expected a VGPR index mode or a closing parenthesis" :
8912 "expected a VGPR index mode");
8913 return UNDEF;
8914 }
8915
8916 if (Imm & Mode) {
8917 Error(S, "duplicate VGPR index mode");
8918 return UNDEF;
8919 }
8920 Imm |= Mode;
8921
8922 if (trySkipToken(AsmToken::RParen))
8923 break;
8924 if (!skipToken(AsmToken::Comma,
8925 "expected a comma or a closing parenthesis"))
8926 return UNDEF;
8927 }
8928
8929 return Imm;
8930}
8931
8932ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
8933
8934 using namespace llvm::AMDGPU::VGPRIndexMode;
8935
8936 int64_t Imm = 0;
8937 SMLoc S = getLoc();
8938
8939 if (trySkipId("gpr_idx", AsmToken::LParen)) {
8940 Imm = parseGPRIdxMacro();
8941 if (Imm == UNDEF)
8942 return ParseStatus::Failure;
8943 } else {
8944 if (getParser().parseAbsoluteExpression(Imm))
8945 return ParseStatus::Failure;
8946 if (Imm < 0 || !isUInt<4>(Imm))
8947 return Error(S, "invalid immediate: only 4-bit values are legal");
8948 }
8949
8950 Operands.push_back(
8951 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
8952 return ParseStatus::Success;
8953}
8954
8955bool AMDGPUOperand::isGPRIdxMode() const {
8956 return isImmTy(ImmTyGprIdxMode);
8957}
8958
8959//===----------------------------------------------------------------------===//
8960// sopp branch targets
8961//===----------------------------------------------------------------------===//
8962
8963ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
8964
8965 // Make sure we are not parsing something
8966 // that looks like a label or an expression but is not.
8967 // This will improve error messages.
8968 if (isRegister() || isModifier())
8969 return ParseStatus::NoMatch;
8970
8971 if (!parseExpr(Operands))
8972 return ParseStatus::Failure;
8973
8974 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
8975 assert(Opr.isImm() || Opr.isExpr());
8976 SMLoc Loc = Opr.getStartLoc();
8977
8978 // Currently we do not support arbitrary expressions as branch targets.
8979 // Only labels and absolute expressions are accepted.
8980 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
8981 Error(Loc, "expected an absolute expression or a label");
8982 } else if (Opr.isImm() && !Opr.isS16Imm()) {
8983 Error(Loc, "expected a 16-bit signed jump offset");
8984 }
8985
8986 return ParseStatus::Success;
8987}
8988
8989//===----------------------------------------------------------------------===//
8990// Boolean holding registers
8991//===----------------------------------------------------------------------===//
8992
8993ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
8994 return parseReg(Operands);
8995}
8996
8997//===----------------------------------------------------------------------===//
8998// mubuf
8999//===----------------------------------------------------------------------===//
9000
9001void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
9002 const OperandVector &Operands,
9003 bool IsAtomic) {
9004 OptionalImmIndexMap OptionalIdx;
9005 unsigned FirstOperandIdx = 1;
9006 bool IsAtomicReturn = false;
9007
9008 if (IsAtomic) {
9009 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
9011 }
9012
9013 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
9014 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9015
9016 // Add the register arguments
9017 if (Op.isReg()) {
9018 Op.addRegOperands(Inst, 1);
9019 // Insert a tied src for atomic return dst.
9020 // This cannot be postponed as subsequent calls to
9021 // addImmOperands rely on correct number of MC operands.
9022 if (IsAtomicReturn && i == FirstOperandIdx)
9023 Op.addRegOperands(Inst, 1);
9024 continue;
9025 }
9026
9027 // Handle the case where soffset is an immediate
9028 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
9029 Op.addImmOperands(Inst, 1);
9030 continue;
9031 }
9032
9033 // Handle tokens like 'offen' which are sometimes hard-coded into the
9034 // asm string. There are no MCInst operands for these.
9035 if (Op.isToken()) {
9036 continue;
9037 }
9038 assert(Op.isImm());
9039
9040 // Handle optional arguments
9041 OptionalIdx[Op.getImmTy()] = i;
9042 }
9043
9044 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
9045 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
9046}
9047
9048//===----------------------------------------------------------------------===//
9049// smrd
9050//===----------------------------------------------------------------------===//
9051
9052bool AMDGPUOperand::isSMRDOffset8() const {
9053 return isImmLiteral() && isUInt<8>(getImm());
9054}
9055
9056bool AMDGPUOperand::isSMEMOffset() const {
9057 // Offset range is checked later by validator.
9058 return isImmLiteral();
9059}
9060
9061bool AMDGPUOperand::isSMRDLiteralOffset() const {
9062 // 32-bit literals are only supported on CI and we only want to use them
9063 // when the offset is > 8-bits.
9064 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
9065}
9066
9067//===----------------------------------------------------------------------===//
9068// vop3
9069//===----------------------------------------------------------------------===//
9070
9071static bool ConvertOmodMul(int64_t &Mul) {
9072 if (Mul != 1 && Mul != 2 && Mul != 4)
9073 return false;
9074
9075 Mul >>= 1;
9076 return true;
9077}
9078
9079static bool ConvertOmodDiv(int64_t &Div) {
9080 if (Div == 1) {
9081 Div = 0;
9082 return true;
9083 }
9084
9085 if (Div == 2) {
9086 Div = 3;
9087 return true;
9088 }
9089
9090 return false;
9091}
9092
9093// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
9094// This is intentional and ensures compatibility with sp3.
9095// See bug 35397 for details.
9096bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
9097 if (BoundCtrl == 0 || BoundCtrl == 1) {
9098 if (!isGFX11Plus())
9099 BoundCtrl = 1;
9100 return true;
9101 }
9102 return false;
9103}
9104
9105void AMDGPUAsmParser::onBeginOfFile() {
9106 if (!getParser().getStreamer().getTargetStreamer() ||
9107 getSTI().getTargetTriple().getArch() == Triple::r600)
9108 return;
9109
9110 if (!getTargetStreamer().getTargetID())
9111 getTargetStreamer().initializeTargetID(getSTI(),
9112 getSTI().getFeatureString());
9113
9114 if (isHsaAbi(getSTI()))
9115 getTargetStreamer().EmitDirectiveAMDGCNTarget();
9116}
9117
9118/// Parse AMDGPU specific expressions.
9119///
9120/// expr ::= or(expr, ...) |
9121/// max(expr, ...)
9122///
9123bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
9124 using AGVK = AMDGPUMCExpr::VariantKind;
9125
9126 if (isToken(AsmToken::Identifier)) {
9127 StringRef TokenId = getTokenStr();
9128 AGVK VK = StringSwitch<AGVK>(TokenId)
9129 .Case("max", AGVK::AGVK_Max)
9130 .Case("or", AGVK::AGVK_Or)
9131 .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs)
9132 .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
9133 .Case("alignto", AGVK::AGVK_AlignTo)
9134 .Case("occupancy", AGVK::AGVK_Occupancy)
9135 .Default(AGVK::AGVK_None);
9136
9137 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
9139 uint64_t CommaCount = 0;
9140 lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
9141 lex(); // Eat '('
9142 while (true) {
9143 if (trySkipToken(AsmToken::RParen)) {
9144 if (Exprs.empty()) {
9145 Error(getToken().getLoc(),
9146 "empty " + Twine(TokenId) + " expression");
9147 return true;
9148 }
9149 if (CommaCount + 1 != Exprs.size()) {
9150 Error(getToken().getLoc(),
9151 "mismatch of commas in " + Twine(TokenId) + " expression");
9152 return true;
9153 }
9154 Res = AMDGPUMCExpr::create(VK, Exprs, getContext());
9155 return false;
9156 }
9157 const MCExpr *Expr;
9158 if (getParser().parseExpression(Expr, EndLoc))
9159 return true;
9160 Exprs.push_back(Expr);
9161 bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
9162 if (LastTokenWasComma)
9163 CommaCount++;
9164 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
9165 Error(getToken().getLoc(),
9166 "unexpected token in " + Twine(TokenId) + " expression");
9167 return true;
9168 }
9169 }
9170 }
9171 }
9172 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
9173}
9174
9175ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
9176 StringRef Name = getTokenStr();
9177 if (Name == "mul") {
9178 return parseIntWithPrefix("mul", Operands,
9179 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
9180 }
9181
9182 if (Name == "div") {
9183 return parseIntWithPrefix("div", Operands,
9184 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
9185 }
9186
9187 return ParseStatus::NoMatch;
9188}
9189
9190// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
9191// the number of src operands present, then copies that bit into src0_modifiers.
9192static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
9193 int Opc = Inst.getOpcode();
9194 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9195 if (OpSelIdx == -1)
9196 return;
9197
9198 int SrcNum;
9199 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9200 AMDGPU::OpName::src2};
9201 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
9202 ++SrcNum)
9203 ;
9204 assert(SrcNum > 0);
9205
9206 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9207
9208 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
9209 if (DstIdx == -1)
9210 return;
9211
9212 const MCOperand &DstOp = Inst.getOperand(DstIdx);
9213 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
9214 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9215 if (DstOp.isReg() &&
9216 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
9218 ModVal |= SISrcMods::DST_OP_SEL;
9219 } else {
9220 if ((OpSel & (1 << SrcNum)) != 0)
9221 ModVal |= SISrcMods::DST_OP_SEL;
9222 }
9223 Inst.getOperand(ModIdx).setImm(ModVal);
9224}
9225
9226void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
9227 const OperandVector &Operands) {
9228 cvtVOP3P(Inst, Operands);
9229 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9230}
9231
9232void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
9233 OptionalImmIndexMap &OptionalIdx) {
9234 cvtVOP3P(Inst, Operands, OptionalIdx);
9235 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9236}
9237
9238static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
9239 return
9240 // 1. This operand is input modifiers
9241 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
9242 // 2. This is not last operand
9243 && Desc.NumOperands > (OpNum + 1)
9244 // 3. Next operand is register class
9245 && Desc.operands()[OpNum + 1].RegClass != -1
9246 // 4. Next register is not tied to any other operand
9247 && Desc.getOperandConstraint(OpNum + 1,
9249}
9250
9251void AMDGPUAsmParser::cvtOpSelHelper(MCInst &Inst, unsigned OpSel) {
9252 unsigned Opc = Inst.getOpcode();
9253 constexpr AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9254 AMDGPU::OpName::src2};
9255 constexpr AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9256 AMDGPU::OpName::src1_modifiers,
9257 AMDGPU::OpName::src2_modifiers};
9258 for (int J = 0; J < 3; ++J) {
9259 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9260 if (OpIdx == -1)
9261 // Some instructions, e.g. v_interp_p2_f16 in GFX9, have src0, src2, but
9262 // no src1. So continue instead of break.
9263 continue;
9264
9265 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9266 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9267
9268 if ((OpSel & (1 << J)) != 0)
9269 ModVal |= SISrcMods::OP_SEL_0;
9270 // op_sel[3] is encoded in src0_modifiers.
9271 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && (OpSel & (1 << 3)) != 0)
9272 ModVal |= SISrcMods::DST_OP_SEL;
9273
9274 Inst.getOperand(ModIdx).setImm(ModVal);
9275 }
9276}
9277
9278void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
9279{
9280 OptionalImmIndexMap OptionalIdx;
9281 unsigned Opc = Inst.getOpcode();
9282
9283 unsigned I = 1;
9284 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9285 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9286 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9287 }
9288
9289 for (unsigned E = Operands.size(); I != E; ++I) {
9290 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9292 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9293 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
9294 Op.isInterpAttrChan()) {
9295 Inst.addOperand(MCOperand::createImm(Op.getImm()));
9296 } else if (Op.isImmModifier()) {
9297 OptionalIdx[Op.getImmTy()] = I;
9298 } else {
9299 llvm_unreachable("unhandled operand type");
9300 }
9301 }
9302
9303 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
9304 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9305 AMDGPUOperand::ImmTyHigh);
9306
9307 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9308 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9309 AMDGPUOperand::ImmTyClamp);
9310
9311 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9312 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9313 AMDGPUOperand::ImmTyOModSI);
9314
9315 // Some v_interp instructions use op_sel[3] for dst.
9316 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
9317 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9318 AMDGPUOperand::ImmTyOpSel);
9319 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9320 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9321
9322 cvtOpSelHelper(Inst, OpSel);
9323 }
9324}
9325
9326void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
9327{
9328 OptionalImmIndexMap OptionalIdx;
9329 unsigned Opc = Inst.getOpcode();
9330
9331 unsigned I = 1;
9332 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9333 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9334 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9335 }
9336
9337 for (unsigned E = Operands.size(); I != E; ++I) {
9338 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9340 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9341 } else if (Op.isImmModifier()) {
9342 OptionalIdx[Op.getImmTy()] = I;
9343 } else {
9344 llvm_unreachable("unhandled operand type");
9345 }
9346 }
9347
9348 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp);
9349
9350 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9351 if (OpSelIdx != -1)
9352 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9353
9354 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
9355
9356 if (OpSelIdx == -1)
9357 return;
9358
9359 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9360 cvtOpSelHelper(Inst, OpSel);
9361}
9362
9363void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
9364 const OperandVector &Operands) {
9365 OptionalImmIndexMap OptionalIdx;
9366 unsigned Opc = Inst.getOpcode();
9367 unsigned I = 1;
9368 int CbszOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
9369
9370 const MCInstrDesc &Desc = MII.get(Opc);
9371
9372 for (unsigned J = 0; J < Desc.getNumDefs(); ++J)
9373 static_cast<AMDGPUOperand &>(*Operands[I++]).addRegOperands(Inst, 1);
9374
9375 for (unsigned E = Operands.size(); I != E; ++I) {
9376 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[I]);
9377 int NumOperands = Inst.getNumOperands();
9378 // The order of operands in MCInst and parsed operands are different.
9379 // Adding dummy cbsz and blgp operands at corresponding MCInst operand
9380 // indices for parsing scale values correctly.
9381 if (NumOperands == CbszOpIdx) {
9384 }
9385 if (isRegOrImmWithInputMods(Desc, NumOperands)) {
9386 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9387 } else if (Op.isImmModifier()) {
9388 OptionalIdx[Op.getImmTy()] = I;
9389 } else {
9390 Op.addRegOrImmOperands(Inst, 1);
9391 }
9392 }
9393
9394 // Insert CBSZ and BLGP operands for F8F6F4 variants
9395 auto CbszIdx = OptionalIdx.find(AMDGPUOperand::ImmTyCBSZ);
9396 if (CbszIdx != OptionalIdx.end()) {
9397 int CbszVal = ((AMDGPUOperand &)*Operands[CbszIdx->second]).getImm();
9398 Inst.getOperand(CbszOpIdx).setImm(CbszVal);
9399 }
9400
9401 int BlgpOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
9402 auto BlgpIdx = OptionalIdx.find(AMDGPUOperand::ImmTyBLGP);
9403 if (BlgpIdx != OptionalIdx.end()) {
9404 int BlgpVal = ((AMDGPUOperand &)*Operands[BlgpIdx->second]).getImm();
9405 Inst.getOperand(BlgpOpIdx).setImm(BlgpVal);
9406 }
9407
9408 // Add dummy src_modifiers
9411
9412 // Handle op_sel fields
9413
9414 unsigned OpSel = 0;
9415 auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
9416 if (OpselIdx != OptionalIdx.end()) {
9417 OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])
9418 .getImm();
9419 }
9420
9421 unsigned OpSelHi = 0;
9422 auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
9423 if (OpselHiIdx != OptionalIdx.end()) {
9424 OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])
9425 .getImm();
9426 }
9427 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9428 AMDGPU::OpName::src1_modifiers};
9429
9430 for (unsigned J = 0; J < 2; ++J) {
9431 unsigned ModVal = 0;
9432 if (OpSel & (1 << J))
9433 ModVal |= SISrcMods::OP_SEL_0;
9434 if (OpSelHi & (1 << J))
9435 ModVal |= SISrcMods::OP_SEL_1;
9436
9437 const int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9438 Inst.getOperand(ModIdx).setImm(ModVal);
9439 }
9440}
9441
9442void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
9443 OptionalImmIndexMap &OptionalIdx) {
9444 unsigned Opc = Inst.getOpcode();
9445
9446 unsigned I = 1;
9447 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9448 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9449 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9450 }
9451
9452 for (unsigned E = Operands.size(); I != E; ++I) {
9453 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9455 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9456 } else if (Op.isImmModifier()) {
9457 OptionalIdx[Op.getImmTy()] = I;
9458 } else {
9459 Op.addRegOrImmOperands(Inst, 1);
9460 }
9461 }
9462
9463 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::scale_sel))
9464 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9465 AMDGPUOperand::ImmTyScaleSel);
9466
9467 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9468 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9469 AMDGPUOperand::ImmTyClamp);
9470
9471 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
9472 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
9473 Inst.addOperand(Inst.getOperand(0));
9474 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9475 AMDGPUOperand::ImmTyByteSel);
9476 }
9477
9478 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9479 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9480 AMDGPUOperand::ImmTyOModSI);
9481
9482 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
9483 // it has src2 register operand that is tied to dst operand
9484 // we don't allow modifiers for this operand in assembler so src2_modifiers
9485 // should be 0.
9486 if (isMAC(Opc)) {
9487 auto *it = Inst.begin();
9488 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
9489 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
9490 ++it;
9491 // Copy the operand to ensure it's not invalidated when Inst grows.
9492 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
9493 }
9494}
9495
9496void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
9497 OptionalImmIndexMap OptionalIdx;
9498 cvtVOP3(Inst, Operands, OptionalIdx);
9499}
9500
9501void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
9502 OptionalImmIndexMap &OptIdx) {
9503 const int Opc = Inst.getOpcode();
9504 const MCInstrDesc &Desc = MII.get(Opc);
9505
9506 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
9507
9508 if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
9509 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
9510 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
9511 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
9512 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
9513 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
9514 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
9515 Inst.addOperand(Inst.getOperand(0));
9516 }
9517
9518 // Adding vdst_in operand is already covered for these DPP instructions in
9519 // cvtVOP3DPP.
9520 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
9521 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx12 ||
9522 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx12 ||
9523 Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx12 ||
9524 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx12 ||
9525 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp_gfx12 ||
9526 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp_gfx12 ||
9527 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp8_gfx12 ||
9528 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp8_gfx12 ||
9529 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
9530 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9531 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp_gfx1250 ||
9532 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp8_gfx1250 ||
9533 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9534 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9535 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp_gfx1250 ||
9536 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp_gfx1250 ||
9537 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp8_gfx1250 ||
9538 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp8_gfx1250 ||
9539 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_gfx1250 ||
9540 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_gfx1250 ||
9541 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp_gfx1250 ||
9542 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp_gfx1250 ||
9543 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp8_gfx1250 ||
9544 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp8_gfx1250 ||
9545 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_gfx1250 ||
9546 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_gfx1250)) {
9547 Inst.addOperand(Inst.getOperand(0));
9548 }
9549
9550 int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::bitop3);
9551 if (BitOp3Idx != -1) {
9552 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9553 }
9554
9555 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
9556 // instruction, and then figure out where to actually put the modifiers
9557
9558 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9559 if (OpSelIdx != -1) {
9560 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
9561 }
9562
9563 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
9564 if (OpSelHiIdx != -1) {
9565 int DefaultVal = IsPacked ? -1 : 0;
9566 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
9567 DefaultVal);
9568 }
9569
9570 int MatrixAFMTIdx =
9571 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_fmt);
9572 if (MatrixAFMTIdx != -1) {
9573 addOptionalImmOperand(Inst, Operands, OptIdx,
9574 AMDGPUOperand::ImmTyMatrixAFMT, 0);
9575 }
9576
9577 int MatrixBFMTIdx =
9578 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_fmt);
9579 if (MatrixBFMTIdx != -1) {
9580 addOptionalImmOperand(Inst, Operands, OptIdx,
9581 AMDGPUOperand::ImmTyMatrixBFMT, 0);
9582 }
9583
9584 int MatrixAScaleIdx =
9585 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale);
9586 if (MatrixAScaleIdx != -1) {
9587 addOptionalImmOperand(Inst, Operands, OptIdx,
9588 AMDGPUOperand::ImmTyMatrixAScale, 0);
9589 }
9590
9591 int MatrixBScaleIdx =
9592 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale);
9593 if (MatrixBScaleIdx != -1) {
9594 addOptionalImmOperand(Inst, Operands, OptIdx,
9595 AMDGPUOperand::ImmTyMatrixBScale, 0);
9596 }
9597
9598 int MatrixAScaleFmtIdx =
9599 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale_fmt);
9600 if (MatrixAScaleFmtIdx != -1) {
9601 addOptionalImmOperand(Inst, Operands, OptIdx,
9602 AMDGPUOperand::ImmTyMatrixAScaleFmt, 0);
9603 }
9604
9605 int MatrixBScaleFmtIdx =
9606 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale_fmt);
9607 if (MatrixBScaleFmtIdx != -1) {
9608 addOptionalImmOperand(Inst, Operands, OptIdx,
9609 AMDGPUOperand::ImmTyMatrixBScaleFmt, 0);
9610 }
9611
9612 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_a_reuse))
9613 addOptionalImmOperand(Inst, Operands, OptIdx,
9614 AMDGPUOperand::ImmTyMatrixAReuse, 0);
9615
9616 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_b_reuse))
9617 addOptionalImmOperand(Inst, Operands, OptIdx,
9618 AMDGPUOperand::ImmTyMatrixBReuse, 0);
9619
9620 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
9621 if (NegLoIdx != -1)
9622 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
9623
9624 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
9625 if (NegHiIdx != -1)
9626 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
9627
9628 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9629 AMDGPU::OpName::src2};
9630 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9631 AMDGPU::OpName::src1_modifiers,
9632 AMDGPU::OpName::src2_modifiers};
9633
9634 unsigned OpSel = 0;
9635 unsigned OpSelHi = 0;
9636 unsigned NegLo = 0;
9637 unsigned NegHi = 0;
9638
9639 if (OpSelIdx != -1)
9640 OpSel = Inst.getOperand(OpSelIdx).getImm();
9641
9642 if (OpSelHiIdx != -1)
9643 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
9644
9645 if (NegLoIdx != -1)
9646 NegLo = Inst.getOperand(NegLoIdx).getImm();
9647
9648 if (NegHiIdx != -1)
9649 NegHi = Inst.getOperand(NegHiIdx).getImm();
9650
9651 for (int J = 0; J < 3; ++J) {
9652 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9653 if (OpIdx == -1)
9654 break;
9655
9656 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9657
9658 if (ModIdx == -1)
9659 continue;
9660
9661 uint32_t ModVal = 0;
9662
9663 const MCOperand &SrcOp = Inst.getOperand(OpIdx);
9664 if (SrcOp.isReg() && getMRI()
9665 ->getRegClass(AMDGPU::VGPR_16RegClassID)
9666 .contains(SrcOp.getReg())) {
9667 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(SrcOp.getReg(), *getMRI());
9668 if (VGPRSuffixIsHi)
9669 ModVal |= SISrcMods::OP_SEL_0;
9670 } else {
9671 if ((OpSel & (1 << J)) != 0)
9672 ModVal |= SISrcMods::OP_SEL_0;
9673 }
9674
9675 if ((OpSelHi & (1 << J)) != 0)
9676 ModVal |= SISrcMods::OP_SEL_1;
9677
9678 if ((NegLo & (1 << J)) != 0)
9679 ModVal |= SISrcMods::NEG;
9680
9681 if ((NegHi & (1 << J)) != 0)
9682 ModVal |= SISrcMods::NEG_HI;
9683
9684 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
9685 }
9686}
9687
9688void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
9689 OptionalImmIndexMap OptIdx;
9690 cvtVOP3(Inst, Operands, OptIdx);
9691 cvtVOP3P(Inst, Operands, OptIdx);
9692}
9693
9695 unsigned i, unsigned Opc,
9696 AMDGPU::OpName OpName) {
9697 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
9698 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
9699 else
9700 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
9701}
9702
9703void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
9704 unsigned Opc = Inst.getOpcode();
9705
9706 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
9707 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
9708 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
9709 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
9710 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
9711
9712 OptionalImmIndexMap OptIdx;
9713 for (unsigned i = 5; i < Operands.size(); ++i) {
9714 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9715 OptIdx[Op.getImmTy()] = i;
9716 }
9717
9718 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
9719 addOptionalImmOperand(Inst, Operands, OptIdx,
9720 AMDGPUOperand::ImmTyIndexKey8bit);
9721
9722 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
9723 addOptionalImmOperand(Inst, Operands, OptIdx,
9724 AMDGPUOperand::ImmTyIndexKey16bit);
9725
9726 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_32bit))
9727 addOptionalImmOperand(Inst, Operands, OptIdx,
9728 AMDGPUOperand::ImmTyIndexKey32bit);
9729
9730 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9731 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp);
9732
9733 cvtVOP3P(Inst, Operands, OptIdx);
9734}
9735
9736//===----------------------------------------------------------------------===//
9737// VOPD
9738//===----------------------------------------------------------------------===//
9739
9740ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
9741 if (!hasVOPD(getSTI()))
9742 return ParseStatus::NoMatch;
9743
9744 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
9745 SMLoc S = getLoc();
9746 lex();
9747 lex();
9748 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
9749 SMLoc OpYLoc = getLoc();
9750 StringRef OpYName;
9751 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
9752 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
9753 return ParseStatus::Success;
9754 }
9755 return Error(OpYLoc, "expected a VOPDY instruction after ::");
9756 }
9757 return ParseStatus::NoMatch;
9758}
9759
9760// Create VOPD MCInst operands using parsed assembler operands.
9761void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
9762 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9763
9764 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
9765 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
9767 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9768 return;
9769 }
9770 if (Op.isReg()) {
9771 Op.addRegOperands(Inst, 1);
9772 return;
9773 }
9774 if (Op.isImm()) {
9775 Op.addImmOperands(Inst, 1);
9776 return;
9777 }
9778 llvm_unreachable("Unhandled operand type in cvtVOPD");
9779 };
9780
9781 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
9782
9783 // MCInst operands are ordered as follows:
9784 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
9785
9786 for (auto CompIdx : VOPD::COMPONENTS) {
9787 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
9788 }
9789
9790 for (auto CompIdx : VOPD::COMPONENTS) {
9791 const auto &CInfo = InstInfo[CompIdx];
9792 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
9793 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
9794 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
9795 if (CInfo.hasSrc2Acc())
9796 addOp(CInfo.getIndexOfDstInParsedOperands());
9797 }
9798
9799 int BitOp3Idx =
9800 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::bitop3);
9801 if (BitOp3Idx != -1) {
9802 OptionalImmIndexMap OptIdx;
9803 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back());
9804 if (Op.isImm())
9805 OptIdx[Op.getImmTy()] = Operands.size() - 1;
9806
9807 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9808 }
9809}
9810
9811//===----------------------------------------------------------------------===//
9812// dpp
9813//===----------------------------------------------------------------------===//
9814
9815bool AMDGPUOperand::isDPP8() const {
9816 return isImmTy(ImmTyDPP8);
9817}
9818
9819bool AMDGPUOperand::isDPPCtrl() const {
9820 using namespace AMDGPU::DPP;
9821
9822 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
9823 if (result) {
9824 int64_t Imm = getImm();
9825 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
9826 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
9827 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
9828 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
9829 (Imm == DppCtrl::WAVE_SHL1) ||
9830 (Imm == DppCtrl::WAVE_ROL1) ||
9831 (Imm == DppCtrl::WAVE_SHR1) ||
9832 (Imm == DppCtrl::WAVE_ROR1) ||
9833 (Imm == DppCtrl::ROW_MIRROR) ||
9834 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
9835 (Imm == DppCtrl::BCAST15) ||
9836 (Imm == DppCtrl::BCAST31) ||
9837 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
9838 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
9839 }
9840 return false;
9841}
9842
9843//===----------------------------------------------------------------------===//
9844// mAI
9845//===----------------------------------------------------------------------===//
9846
9847bool AMDGPUOperand::isBLGP() const {
9848 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
9849}
9850
9851bool AMDGPUOperand::isS16Imm() const {
9852 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
9853}
9854
9855bool AMDGPUOperand::isU16Imm() const {
9856 return isImmLiteral() && isUInt<16>(getImm());
9857}
9858
9859//===----------------------------------------------------------------------===//
9860// dim
9861//===----------------------------------------------------------------------===//
9862
9863bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
9864 // We want to allow "dim:1D" etc.,
9865 // but the initial 1 is tokenized as an integer.
9866 std::string Token;
9867 if (isToken(AsmToken::Integer)) {
9868 SMLoc Loc = getToken().getEndLoc();
9869 Token = std::string(getTokenStr());
9870 lex();
9871 if (getLoc() != Loc)
9872 return false;
9873 }
9874
9875 StringRef Suffix;
9876 if (!parseId(Suffix))
9877 return false;
9878 Token += Suffix;
9879
9880 StringRef DimId = Token;
9881 DimId.consume_front("SQ_RSRC_IMG_");
9882
9883 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
9884 if (!DimInfo)
9885 return false;
9886
9887 Encoding = DimInfo->Encoding;
9888 return true;
9889}
9890
9891ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
9892 if (!isGFX10Plus())
9893 return ParseStatus::NoMatch;
9894
9895 SMLoc S = getLoc();
9896
9897 if (!trySkipId("dim", AsmToken::Colon))
9898 return ParseStatus::NoMatch;
9899
9900 unsigned Encoding;
9901 SMLoc Loc = getLoc();
9902 if (!parseDimId(Encoding))
9903 return Error(Loc, "invalid dim value");
9904
9905 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
9906 AMDGPUOperand::ImmTyDim));
9907 return ParseStatus::Success;
9908}
9909
9910//===----------------------------------------------------------------------===//
9911// dpp
9912//===----------------------------------------------------------------------===//
9913
9914ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
9915 SMLoc S = getLoc();
9916
9917 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
9918 return ParseStatus::NoMatch;
9919
9920 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
9921
9922 int64_t Sels[8];
9923
9924 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9925 return ParseStatus::Failure;
9926
9927 for (size_t i = 0; i < 8; ++i) {
9928 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9929 return ParseStatus::Failure;
9930
9931 SMLoc Loc = getLoc();
9932 if (getParser().parseAbsoluteExpression(Sels[i]))
9933 return ParseStatus::Failure;
9934 if (0 > Sels[i] || 7 < Sels[i])
9935 return Error(Loc, "expected a 3-bit value");
9936 }
9937
9938 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9939 return ParseStatus::Failure;
9940
9941 unsigned DPP8 = 0;
9942 for (size_t i = 0; i < 8; ++i)
9943 DPP8 |= (Sels[i] << (i * 3));
9944
9945 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9946 return ParseStatus::Success;
9947}
9948
9949bool
9950AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
9951 const OperandVector &Operands) {
9952 if (Ctrl == "row_newbcast")
9953 return isGFX90A();
9954
9955 if (Ctrl == "row_share" ||
9956 Ctrl == "row_xmask")
9957 return isGFX10Plus();
9958
9959 if (Ctrl == "wave_shl" ||
9960 Ctrl == "wave_shr" ||
9961 Ctrl == "wave_rol" ||
9962 Ctrl == "wave_ror" ||
9963 Ctrl == "row_bcast")
9964 return isVI() || isGFX9();
9965
9966 return Ctrl == "row_mirror" ||
9967 Ctrl == "row_half_mirror" ||
9968 Ctrl == "quad_perm" ||
9969 Ctrl == "row_shl" ||
9970 Ctrl == "row_shr" ||
9971 Ctrl == "row_ror";
9972}
9973
9974int64_t
9975AMDGPUAsmParser::parseDPPCtrlPerm() {
9976 // quad_perm:[%d,%d,%d,%d]
9977
9978 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9979 return -1;
9980
9981 int64_t Val = 0;
9982 for (int i = 0; i < 4; ++i) {
9983 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9984 return -1;
9985
9986 int64_t Temp;
9987 SMLoc Loc = getLoc();
9988 if (getParser().parseAbsoluteExpression(Temp))
9989 return -1;
9990 if (Temp < 0 || Temp > 3) {
9991 Error(Loc, "expected a 2-bit value");
9992 return -1;
9993 }
9994
9995 Val += (Temp << i * 2);
9996 }
9997
9998 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9999 return -1;
10000
10001 return Val;
10002}
10003
10004int64_t
10005AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
10006 using namespace AMDGPU::DPP;
10007
10008 // sel:%d
10009
10010 int64_t Val;
10011 SMLoc Loc = getLoc();
10012
10013 if (getParser().parseAbsoluteExpression(Val))
10014 return -1;
10015
10016 struct DppCtrlCheck {
10017 int64_t Ctrl;
10018 int Lo;
10019 int Hi;
10020 };
10021
10022 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
10023 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
10024 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
10025 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
10026 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
10027 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
10028 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
10029 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
10030 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
10031 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
10032 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
10033 .Default({-1, 0, 0});
10034
10035 bool Valid;
10036 if (Check.Ctrl == -1) {
10037 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
10038 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
10039 } else {
10040 Valid = Check.Lo <= Val && Val <= Check.Hi;
10041 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
10042 }
10043
10044 if (!Valid) {
10045 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
10046 return -1;
10047 }
10048
10049 return Val;
10050}
10051
10052ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
10053 using namespace AMDGPU::DPP;
10054
10055 if (!isToken(AsmToken::Identifier) ||
10056 !isSupportedDPPCtrl(getTokenStr(), Operands))
10057 return ParseStatus::NoMatch;
10058
10059 SMLoc S = getLoc();
10060 int64_t Val = -1;
10061 StringRef Ctrl;
10062
10063 parseId(Ctrl);
10064
10065 if (Ctrl == "row_mirror") {
10066 Val = DppCtrl::ROW_MIRROR;
10067 } else if (Ctrl == "row_half_mirror") {
10068 Val = DppCtrl::ROW_HALF_MIRROR;
10069 } else {
10070 if (skipToken(AsmToken::Colon, "expected a colon")) {
10071 if (Ctrl == "quad_perm") {
10072 Val = parseDPPCtrlPerm();
10073 } else {
10074 Val = parseDPPCtrlSel(Ctrl);
10075 }
10076 }
10077 }
10078
10079 if (Val == -1)
10080 return ParseStatus::Failure;
10081
10082 Operands.push_back(
10083 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
10084 return ParseStatus::Success;
10085}
10086
10087void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
10088 bool IsDPP8) {
10089 OptionalImmIndexMap OptionalIdx;
10090 unsigned Opc = Inst.getOpcode();
10091 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10092
10093 // MAC instructions are special because they have 'old'
10094 // operand which is not tied to dst (but assumed to be).
10095 // They also have dummy unused src2_modifiers.
10096 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
10097 int Src2ModIdx =
10098 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
10099 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
10100 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
10101
10102 unsigned I = 1;
10103 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10104 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10105 }
10106
10107 int Fi = 0;
10108 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
10109 bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
10110 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
10111 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
10112 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
10113
10114 for (unsigned E = Operands.size(); I != E; ++I) {
10115
10116 if (IsMAC) {
10117 int NumOperands = Inst.getNumOperands();
10118 if (OldIdx == NumOperands) {
10119 // Handle old operand
10120 constexpr int DST_IDX = 0;
10121 Inst.addOperand(Inst.getOperand(DST_IDX));
10122 } else if (Src2ModIdx == NumOperands) {
10123 // Add unused dummy src2_modifiers
10125 }
10126 }
10127
10128 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
10129 Inst.addOperand(Inst.getOperand(0));
10130 }
10131
10132 if (IsVOP3CvtSrDpp) {
10133 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
10135 Inst.addOperand(MCOperand::createReg(MCRegister()));
10136 }
10137 }
10138
10139 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10141 if (TiedTo != -1) {
10142 assert((unsigned)TiedTo < Inst.getNumOperands());
10143 // handle tied old or src2 for MAC instructions
10144 Inst.addOperand(Inst.getOperand(TiedTo));
10145 }
10146 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10147 // Add the register arguments
10148 if (IsDPP8 && Op.isDppFI()) {
10149 Fi = Op.getImm();
10150 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10151 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10152 } else if (Op.isReg()) {
10153 Op.addRegOperands(Inst, 1);
10154 } else if (Op.isImm() &&
10155 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
10156 Op.addImmOperands(Inst, 1);
10157 } else if (Op.isImm()) {
10158 OptionalIdx[Op.getImmTy()] = I;
10159 } else {
10160 llvm_unreachable("unhandled operand type");
10161 }
10162 }
10163
10164 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp) && !IsVOP3CvtSrDpp)
10165 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10166 AMDGPUOperand::ImmTyClamp);
10167
10168 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
10169 if (VdstInIdx == static_cast<int>(Inst.getNumOperands()))
10170 Inst.addOperand(Inst.getOperand(0));
10171 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10172 AMDGPUOperand::ImmTyByteSel);
10173 }
10174
10175 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10176 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
10177
10178 if (Desc.TSFlags & SIInstrFlags::VOP3P)
10179 cvtVOP3P(Inst, Operands, OptionalIdx);
10180 else if (Desc.TSFlags & SIInstrFlags::VOP3)
10181 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
10182 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
10183 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
10184 }
10185
10186 if (IsDPP8) {
10187 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
10188 using namespace llvm::AMDGPU::DPP;
10189 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10190 } else {
10191 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
10192 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10193 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10194 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10195
10196 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
10197 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10198 AMDGPUOperand::ImmTyDppFI);
10199 }
10200}
10201
10202void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
10203 OptionalImmIndexMap OptionalIdx;
10204
10205 unsigned I = 1;
10206 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10207 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10208 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10209 }
10210
10211 int Fi = 0;
10212 for (unsigned E = Operands.size(); I != E; ++I) {
10213 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10215 if (TiedTo != -1) {
10216 assert((unsigned)TiedTo < Inst.getNumOperands());
10217 // handle tied old or src2 for MAC instructions
10218 Inst.addOperand(Inst.getOperand(TiedTo));
10219 }
10220 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10221 // Add the register arguments
10222 if (Op.isReg() && validateVccOperand(Op.getReg())) {
10223 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
10224 // Skip it.
10225 continue;
10226 }
10227
10228 if (IsDPP8) {
10229 if (Op.isDPP8()) {
10230 Op.addImmOperands(Inst, 1);
10231 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10232 Op.addRegWithFPInputModsOperands(Inst, 2);
10233 } else if (Op.isDppFI()) {
10234 Fi = Op.getImm();
10235 } else if (Op.isReg()) {
10236 Op.addRegOperands(Inst, 1);
10237 } else {
10238 llvm_unreachable("Invalid operand type");
10239 }
10240 } else {
10242 Op.addRegWithFPInputModsOperands(Inst, 2);
10243 } else if (Op.isReg()) {
10244 Op.addRegOperands(Inst, 1);
10245 } else if (Op.isDPPCtrl()) {
10246 Op.addImmOperands(Inst, 1);
10247 } else if (Op.isImm()) {
10248 // Handle optional arguments
10249 OptionalIdx[Op.getImmTy()] = I;
10250 } else {
10251 llvm_unreachable("Invalid operand type");
10252 }
10253 }
10254 }
10255
10256 if (IsDPP8) {
10257 using namespace llvm::AMDGPU::DPP;
10258 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10259 } else {
10260 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10261 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10262 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10263 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
10264 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10265 AMDGPUOperand::ImmTyDppFI);
10266 }
10267 }
10268}
10269
10270//===----------------------------------------------------------------------===//
10271// sdwa
10272//===----------------------------------------------------------------------===//
10273
10274ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
10275 StringRef Prefix,
10276 AMDGPUOperand::ImmTy Type) {
10277 return parseStringOrIntWithPrefix(
10278 Operands, Prefix,
10279 {"BYTE_0", "BYTE_1", "BYTE_2", "BYTE_3", "WORD_0", "WORD_1", "DWORD"},
10280 Type);
10281}
10282
10283ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
10284 return parseStringOrIntWithPrefix(
10285 Operands, "dst_unused", {"UNUSED_PAD", "UNUSED_SEXT", "UNUSED_PRESERVE"},
10286 AMDGPUOperand::ImmTySDWADstUnused);
10287}
10288
10289void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
10290 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
10291}
10292
10293void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
10294 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
10295}
10296
10297void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
10298 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
10299}
10300
10301void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
10302 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
10303}
10304
10305void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
10306 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
10307}
10308
10309void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
10310 uint64_t BasicInstType,
10311 bool SkipDstVcc,
10312 bool SkipSrcVcc) {
10313 using namespace llvm::AMDGPU::SDWA;
10314
10315 OptionalImmIndexMap OptionalIdx;
10316 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
10317 bool SkippedVcc = false;
10318
10319 unsigned I = 1;
10320 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10321 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10322 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10323 }
10324
10325 for (unsigned E = Operands.size(); I != E; ++I) {
10326 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10327 if (SkipVcc && !SkippedVcc && Op.isReg() &&
10328 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
10329 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
10330 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
10331 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
10332 // Skip VCC only if we didn't skip it on previous iteration.
10333 // Note that src0 and src1 occupy 2 slots each because of modifiers.
10334 if (BasicInstType == SIInstrFlags::VOP2 &&
10335 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
10336 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
10337 SkippedVcc = true;
10338 continue;
10339 }
10340 if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) {
10341 SkippedVcc = true;
10342 continue;
10343 }
10344 }
10346 Op.addRegOrImmWithInputModsOperands(Inst, 2);
10347 } else if (Op.isImm()) {
10348 // Handle optional arguments
10349 OptionalIdx[Op.getImmTy()] = I;
10350 } else {
10351 llvm_unreachable("Invalid operand type");
10352 }
10353 SkippedVcc = false;
10354 }
10355
10356 const unsigned Opc = Inst.getOpcode();
10357 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
10358 Opc != AMDGPU::V_NOP_sdwa_vi) {
10359 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
10360 switch (BasicInstType) {
10361 case SIInstrFlags::VOP1:
10362 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
10363 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10364 AMDGPUOperand::ImmTyClamp, 0);
10365
10366 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10367 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10368 AMDGPUOperand::ImmTyOModSI, 0);
10369
10370 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
10371 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10372 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10373
10374 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
10375 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10376 AMDGPUOperand::ImmTySDWADstUnused,
10377 DstUnused::UNUSED_PRESERVE);
10378
10379 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10380 break;
10381
10382 case SIInstrFlags::VOP2:
10383 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10384 AMDGPUOperand::ImmTyClamp, 0);
10385
10386 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
10387 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
10388
10389 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10390 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
10391 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10392 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10393 break;
10394
10395 case SIInstrFlags::VOPC:
10396 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
10397 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10398 AMDGPUOperand::ImmTyClamp, 0);
10399 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10400 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10401 break;
10402
10403 default:
10404 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
10405 }
10406 }
10407
10408 // special case v_mac_{f16, f32}:
10409 // it has src2 register operand that is tied to dst operand
10410 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
10411 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
10412 auto *it = Inst.begin();
10413 std::advance(
10414 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
10415 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
10416 }
10417}
10418
10419/// Force static initialization.
10420extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
10425
10426#define GET_MATCHER_IMPLEMENTATION
10427#define GET_MNEMONIC_SPELL_CHECKER
10428#define GET_MNEMONIC_CHECKER
10429#include "AMDGPUGenAsmMatcher.inc"
10430
10431ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
10432 unsigned MCK) {
10433 switch (MCK) {
10434 case MCK_addr64:
10435 return parseTokenOp("addr64", Operands);
10436 case MCK_done:
10437 return parseTokenOp("done", Operands);
10438 case MCK_idxen:
10439 return parseTokenOp("idxen", Operands);
10440 case MCK_lds:
10441 return parseTokenOp("lds", Operands);
10442 case MCK_offen:
10443 return parseTokenOp("offen", Operands);
10444 case MCK_off:
10445 return parseTokenOp("off", Operands);
10446 case MCK_row_95_en:
10447 return parseTokenOp("row_en", Operands);
10448 case MCK_gds:
10449 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
10450 case MCK_tfe:
10451 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
10452 }
10453 return tryCustomParseOperand(Operands, MCK);
10454}
10455
10456// This function should be defined after auto-generated include so that we have
10457// MatchClassKind enum defined
10458unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
10459 unsigned Kind) {
10460 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
10461 // But MatchInstructionImpl() expects to meet token and fails to validate
10462 // operand. This method checks if we are given immediate operand but expect to
10463 // get corresponding token.
10464 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
10465 switch (Kind) {
10466 case MCK_addr64:
10467 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
10468 case MCK_gds:
10469 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
10470 case MCK_lds:
10471 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
10472 case MCK_idxen:
10473 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
10474 case MCK_offen:
10475 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
10476 case MCK_tfe:
10477 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
10478 case MCK_SSrc_b32:
10479 // When operands have expression values, they will return true for isToken,
10480 // because it is not possible to distinguish between a token and an
10481 // expression at parse time. MatchInstructionImpl() will always try to
10482 // match an operand as a token, when isToken returns true, and when the
10483 // name of the expression is not a valid token, the match will fail,
10484 // so we need to handle it here.
10485 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
10486 case MCK_SSrc_f32:
10487 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
10488 case MCK_SOPPBrTarget:
10489 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
10490 case MCK_VReg32OrOff:
10491 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
10492 case MCK_InterpSlot:
10493 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
10494 case MCK_InterpAttr:
10495 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
10496 case MCK_InterpAttrChan:
10497 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
10498 case MCK_SReg_64:
10499 case MCK_SReg_64_XEXEC:
10500 // Null is defined as a 32-bit register but
10501 // it should also be enabled with 64-bit operands or larger.
10502 // The following code enables it for SReg_64 and larger operands
10503 // used as source and destination. Remaining source
10504 // operands are handled in isInlinableImm.
10505 case MCK_SReg_96:
10506 case MCK_SReg_128:
10507 case MCK_SReg_256:
10508 case MCK_SReg_512:
10509 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
10510 default:
10511 return Match_InvalidOperand;
10512 }
10513}
10514
10515//===----------------------------------------------------------------------===//
10516// endpgm
10517//===----------------------------------------------------------------------===//
10518
10519ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
10520 SMLoc S = getLoc();
10521 int64_t Imm = 0;
10522
10523 if (!parseExpr(Imm)) {
10524 // The operand is optional, if not present default to 0
10525 Imm = 0;
10526 }
10527
10528 if (!isUInt<16>(Imm))
10529 return Error(S, "expected a 16-bit value");
10530
10531 Operands.push_back(
10532 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
10533 return ParseStatus::Success;
10534}
10535
10536bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
10537
10538//===----------------------------------------------------------------------===//
10539// Split Barrier
10540//===----------------------------------------------------------------------===//
10541
10542bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
#define Success
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
SmallVector< int16_t, MAX_SRC_OPERANDS_NUM > OperandIndices
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, AMDGPU::OpName OpName)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static bool IsRevOpcode(const unsigned Opcode)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static MCRegister getSpecialRegForName(StringRef RegName)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0, std::optional< unsigned > InsertAt=std::nullopt)
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDGPU metadata definitions and in-memory representations.
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
#define LLVM_READNONE
Definition Compiler.h:315
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
@ Default
#define Check(C,...)
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
Loop::LoopBounds::Direction Direction
Definition LoopInfo.cpp:231
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
mir Rename Register Operands
Register Reg
Register const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(PassOpts->AAPipeline)
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Interface definition for SIInstrInfo.
unsigned unsigned DefaultVal
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file implements the SmallBitVector class.
static bool Enabled
Definition Statistic.cpp:46
StringSet - A set-like wrapper for the StringMap.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
BinaryOperator * Mul
static const char * getRegisterName(MCRegister Reg)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:6057
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
iterator end() const
Definition ArrayRef.h:136
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition MCAsmMacro.h:103
bool is(TokenKind K) const
Definition MCAsmMacro.h:75
Register getReg() const
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
void printExpr(raw_ostream &, const MCExpr &) const
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:343
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:353
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:428
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getNumOperands() const
Definition MCInst.h:212
SMLoc getLoc() const
Definition MCInst.h:208
void setLoc(SMLoc loc)
Definition MCInst.h:207
unsigned getOpcode() const
Definition MCInst.h:202
iterator insert(iterator I, const MCOperand &Op)
Definition MCInst.h:232
void addOperand(const MCOperand Op)
Definition MCInst.h:215
iterator begin()
Definition MCInst.h:227
size_t size() const
Definition MCInst.h:226
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Describe properties that are true of each instruction in the target description file.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
int16_t getOpRegClassID(const MCOperandInfo &OpInfo, unsigned HwModeId) const
Return the ID of the register class to use for OpInfo, for the active HwMode HwModeId.
Definition MCInstrInfo.h:80
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
void setImm(int64_t Val)
Definition MCInst.h:89
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
int64_t getImm() const
Definition MCInst.h:84
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
bool isImm() const
Definition MCInst.h:66
void setReg(MCRegister Reg)
Set the register number.
Definition MCInst.h:79
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
const MCExpr * getExpr() const
Definition MCInst.h:118
bool isExpr() const
Definition MCInst.h:69
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
constexpr bool isValid() const
Definition MCRegister.h:76
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
Generic base class for all target subtargets.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50
void setRedefinable(bool Value)
Mark this symbol as redefinable.
Definition MCSymbol.h:210
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
MCTargetAsmParser - Generic interface to target specific assembly parsers.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
constexpr unsigned id() const
Definition Register.h:95
Represents a location in source code.
Definition SMLoc.h:23
static SMLoc getFromPointer(const char *Ptr)
Definition SMLoc.h:36
constexpr const char * getPointer() const
Definition SMLoc.h:34
constexpr bool isValid() const
Definition SMLoc.h:29
SMLoc Start
Definition SMLoc.h:50
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:854
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
Definition StringRef.h:657
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:573
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:611
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:140
bool consume_front(StringRef Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition StringRef.h:637
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition StringRef.h:273
bool contains(StringRef key) const
Check if the set contains the given key.
Definition StringSet.h:60
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition StringSet.h:39
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY bool isLitExpr(const MCExpr *Expr)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool hasMAIInsts(const MCSubtargetInfo &STI)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
LLVM_READONLY int64_t getLitValue(const MCExpr *Expr)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:231
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:202
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:209
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:222
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:219
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:224
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:210
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:206
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:201
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:208
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:207
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:218
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:216
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:211
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:205
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:225
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:236
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:237
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:212
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:204
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:221
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:217
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:223
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:213
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:238
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:220
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:203
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition SIDefines.h:228
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
int64_t encode32BitLiteral(int64_t Imm, OperandType Type)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1425
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ OPERAND_IMMEDIATE
Definition MCInstrDesc.h:62
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Context & getContext() const
Definition BasicBlock.h:99
bool isNull(StringRef S)
Definition YAMLTraits.h:570
This is an optimization pass for GlobalISel generic memory operations.
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
Definition Error.h:1113
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
StringMapEntry< Value * > ValueName
Definition Value.h:56
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1727
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
Definition Alignment.h:206
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
static bool isMem(const MachineInstr &MI, unsigned Op)
LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
void PrintError(const Twine &Msg)
Definition Error.cpp:104
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:252
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
Definition InstrProf.h:267
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:331
Op::Description Desc
Target & getTheR600Target()
The target for R600 GPUs.
SmallVectorImpl< std::unique_ptr< MCParsedAsmOperand > > OperandVector
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:159
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:164
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:405
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Sub
Subtraction of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1760
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:257
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
#define N
RegisterKind Kind
StringLiteral Name
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, bool InitMCExpr=true)
Instruction set architecture version.
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
static LLVM_ABI const fltSemantics & IEEEsingle() LLVM_READNONE
Definition APFloat.cpp:266
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304
static LLVM_ABI const fltSemantics & IEEEdouble() LLVM_READNONE
Definition APFloat.cpp:267
static LLVM_ABI const fltSemantics & IEEEhalf() LLVM_READNONE
Definition APFloat.cpp:264
static LLVM_ABI const fltSemantics & BFloat() LLVM_READNONE
Definition APFloat.cpp:265
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:320
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...