LLVM 22.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
16#include "SIDefines.h"
17#include "SIInstrInfo.h"
22#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/StringSet.h"
25#include "llvm/ADT/Twine.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInst.h"
32#include "llvm/MC/MCInstrDesc.h"
38#include "llvm/MC/MCSymbol.h"
46#include <optional>
47
48using namespace llvm;
49using namespace llvm::AMDGPU;
50using namespace llvm::amdhsa;
51
52namespace {
53
54class AMDGPUAsmParser;
55
56enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
57
58//===----------------------------------------------------------------------===//
59// Operand
60//===----------------------------------------------------------------------===//
61
62class AMDGPUOperand : public MCParsedAsmOperand {
63 enum KindTy {
64 Token,
65 Immediate,
66 Register,
67 Expression
68 } Kind;
69
70 SMLoc StartLoc, EndLoc;
71 const AMDGPUAsmParser *AsmParser;
72
73public:
74 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
75 : Kind(Kind_), AsmParser(AsmParser_) {}
76
77 using Ptr = std::unique_ptr<AMDGPUOperand>;
78
79 struct Modifiers {
80 bool Abs = false;
81 bool Neg = false;
82 bool Sext = false;
83 LitModifier Lit = LitModifier::None;
84
85 bool hasFPModifiers() const { return Abs || Neg; }
86 bool hasIntModifiers() const { return Sext; }
87 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
88
89 int64_t getFPModifiersOperand() const {
90 int64_t Operand = 0;
91 Operand |= Abs ? SISrcMods::ABS : 0u;
92 Operand |= Neg ? SISrcMods::NEG : 0u;
93 return Operand;
94 }
95
96 int64_t getIntModifiersOperand() const {
97 int64_t Operand = 0;
98 Operand |= Sext ? SISrcMods::SEXT : 0u;
99 return Operand;
100 }
101
102 int64_t getModifiersOperand() const {
103 assert(!(hasFPModifiers() && hasIntModifiers())
104 && "fp and int modifiers should not be used simultaneously");
105 if (hasFPModifiers())
106 return getFPModifiersOperand();
107 if (hasIntModifiers())
108 return getIntModifiersOperand();
109 return 0;
110 }
111
112 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
113 };
114
115 enum ImmTy {
116 ImmTyNone,
117 ImmTyGDS,
118 ImmTyLDS,
119 ImmTyOffen,
120 ImmTyIdxen,
121 ImmTyAddr64,
122 ImmTyOffset,
123 ImmTyInstOffset,
124 ImmTyOffset0,
125 ImmTyOffset1,
126 ImmTySMEMOffsetMod,
127 ImmTyCPol,
128 ImmTyTFE,
129 ImmTyD16,
130 ImmTyClamp,
131 ImmTyOModSI,
132 ImmTySDWADstSel,
133 ImmTySDWASrc0Sel,
134 ImmTySDWASrc1Sel,
135 ImmTySDWADstUnused,
136 ImmTyDMask,
137 ImmTyDim,
138 ImmTyUNorm,
139 ImmTyDA,
140 ImmTyR128A16,
141 ImmTyA16,
142 ImmTyLWE,
143 ImmTyExpTgt,
144 ImmTyExpCompr,
145 ImmTyExpVM,
146 ImmTyFORMAT,
147 ImmTyHwreg,
148 ImmTyOff,
149 ImmTySendMsg,
150 ImmTyInterpSlot,
151 ImmTyInterpAttr,
152 ImmTyInterpAttrChan,
153 ImmTyOpSel,
154 ImmTyOpSelHi,
155 ImmTyNegLo,
156 ImmTyNegHi,
157 ImmTyIndexKey8bit,
158 ImmTyIndexKey16bit,
159 ImmTyIndexKey32bit,
160 ImmTyDPP8,
161 ImmTyDppCtrl,
162 ImmTyDppRowMask,
163 ImmTyDppBankMask,
164 ImmTyDppBoundCtrl,
165 ImmTyDppFI,
166 ImmTySwizzle,
167 ImmTyGprIdxMode,
168 ImmTyHigh,
169 ImmTyBLGP,
170 ImmTyCBSZ,
171 ImmTyABID,
172 ImmTyEndpgm,
173 ImmTyWaitVDST,
174 ImmTyWaitEXP,
175 ImmTyWaitVAVDst,
176 ImmTyWaitVMVSrc,
177 ImmTyBitOp3,
178 ImmTyMatrixAFMT,
179 ImmTyMatrixBFMT,
180 ImmTyMatrixAScale,
181 ImmTyMatrixBScale,
182 ImmTyMatrixAScaleFmt,
183 ImmTyMatrixBScaleFmt,
184 ImmTyMatrixAReuse,
185 ImmTyMatrixBReuse,
186 ImmTyScaleSel,
187 ImmTyByteSel,
188 };
189
190private:
191 struct TokOp {
192 const char *Data;
193 unsigned Length;
194 };
195
196 struct ImmOp {
197 int64_t Val;
198 ImmTy Type;
199 bool IsFPImm;
200 Modifiers Mods;
201 };
202
203 struct RegOp {
204 MCRegister RegNo;
205 Modifiers Mods;
206 };
207
208 union {
209 TokOp Tok;
210 ImmOp Imm;
211 RegOp Reg;
212 const MCExpr *Expr;
213 };
214
215 // The index of the associated MCInst operand.
216 mutable int MCOpIdx = -1;
217
218public:
219 bool isToken() const override { return Kind == Token; }
220
221 bool isSymbolRefExpr() const {
222 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
223 }
224
225 bool isImm() const override {
226 return Kind == Immediate;
227 }
228
229 bool isInlinableImm(MVT type) const;
230 bool isLiteralImm(MVT type) const;
231
232 bool isRegKind() const {
233 return Kind == Register;
234 }
235
236 bool isReg() const override {
237 return isRegKind() && !hasModifiers();
238 }
239
240 bool isRegOrInline(unsigned RCID, MVT type) const {
241 return isRegClass(RCID) || isInlinableImm(type);
242 }
243
244 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
245 return isRegOrInline(RCID, type) || isLiteralImm(type);
246 }
247
248 bool isRegOrImmWithInt16InputMods() const {
249 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
250 }
251
252 template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {
254 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
255 }
256
257 bool isRegOrImmWithInt32InputMods() const {
258 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
259 }
260
261 bool isRegOrInlineImmWithInt16InputMods() const {
262 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
263 }
264
265 template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {
266 return isRegOrInline(
267 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
268 }
269
270 bool isRegOrInlineImmWithInt32InputMods() const {
271 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
272 }
273
274 bool isRegOrImmWithInt64InputMods() const {
275 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
276 }
277
278 bool isRegOrImmWithFP16InputMods() const {
279 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
280 }
281
282 template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {
284 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
285 }
286
287 bool isRegOrImmWithFP32InputMods() const {
288 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
289 }
290
291 bool isRegOrImmWithFP64InputMods() const {
292 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
293 }
294
295 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
296 return isRegOrInline(
297 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
298 }
299
300 bool isRegOrInlineImmWithFP32InputMods() const {
301 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
302 }
303
304 bool isRegOrInlineImmWithFP64InputMods() const {
305 return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
306 }
307
308 bool isVRegWithInputMods(unsigned RCID) const { return isRegClass(RCID); }
309
310 bool isVRegWithFP32InputMods() const {
311 return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
312 }
313
314 bool isVRegWithFP64InputMods() const {
315 return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
316 }
317
318 bool isPackedFP16InputMods() const {
319 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
320 }
321
322 bool isPackedVGPRFP32InputMods() const {
323 return isRegOrImmWithInputMods(AMDGPU::VReg_64RegClassID, MVT::v2f32);
324 }
325
326 bool isVReg() const {
327 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
328 isRegClass(AMDGPU::VReg_64RegClassID) ||
329 isRegClass(AMDGPU::VReg_96RegClassID) ||
330 isRegClass(AMDGPU::VReg_128RegClassID) ||
331 isRegClass(AMDGPU::VReg_160RegClassID) ||
332 isRegClass(AMDGPU::VReg_192RegClassID) ||
333 isRegClass(AMDGPU::VReg_256RegClassID) ||
334 isRegClass(AMDGPU::VReg_512RegClassID) ||
335 isRegClass(AMDGPU::VReg_1024RegClassID);
336 }
337
338 bool isVReg32() const {
339 return isRegClass(AMDGPU::VGPR_32RegClassID);
340 }
341
342 bool isVReg32OrOff() const {
343 return isOff() || isVReg32();
344 }
345
346 bool isNull() const {
347 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
348 }
349
350 bool isVRegWithInputMods() const;
351 template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
352 template <bool IsFake16> bool isT16VRegWithInputMods() const;
353
354 bool isSDWAOperand(MVT type) const;
355 bool isSDWAFP16Operand() const;
356 bool isSDWAFP32Operand() const;
357 bool isSDWAInt16Operand() const;
358 bool isSDWAInt32Operand() const;
359
360 bool isImmTy(ImmTy ImmT) const {
361 return isImm() && Imm.Type == ImmT;
362 }
363
364 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
365
366 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
367
368 bool isImmModifier() const {
369 return isImm() && Imm.Type != ImmTyNone;
370 }
371
372 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
373 bool isDim() const { return isImmTy(ImmTyDim); }
374 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
375 bool isOff() const { return isImmTy(ImmTyOff); }
376 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
377 bool isOffen() const { return isImmTy(ImmTyOffen); }
378 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
379 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
380 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
381 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
382 bool isGDS() const { return isImmTy(ImmTyGDS); }
383 bool isLDS() const { return isImmTy(ImmTyLDS); }
384 bool isCPol() const { return isImmTy(ImmTyCPol); }
385 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
386 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
387 bool isIndexKey32bit() const { return isImmTy(ImmTyIndexKey32bit); }
388 bool isMatrixAFMT() const { return isImmTy(ImmTyMatrixAFMT); }
389 bool isMatrixBFMT() const { return isImmTy(ImmTyMatrixBFMT); }
390 bool isMatrixAScale() const { return isImmTy(ImmTyMatrixAScale); }
391 bool isMatrixBScale() const { return isImmTy(ImmTyMatrixBScale); }
392 bool isMatrixAScaleFmt() const { return isImmTy(ImmTyMatrixAScaleFmt); }
393 bool isMatrixBScaleFmt() const { return isImmTy(ImmTyMatrixBScaleFmt); }
394 bool isMatrixAReuse() const { return isImmTy(ImmTyMatrixAReuse); }
395 bool isMatrixBReuse() const { return isImmTy(ImmTyMatrixBReuse); }
396 bool isTFE() const { return isImmTy(ImmTyTFE); }
397 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
398 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
399 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
400 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
401 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
402 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
403 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
404 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
405 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
406 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
407 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
408 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
409 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
410 bool isBitOp3() const { return isImmTy(ImmTyBitOp3) && isUInt<8>(getImm()); }
411
412 bool isRegOrImm() const {
413 return isReg() || isImm();
414 }
415
416 bool isRegClass(unsigned RCID) const;
417
418 bool isInlineValue() const;
419
420 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
421 return isRegOrInline(RCID, type) && !hasModifiers();
422 }
423
424 bool isSCSrcB16() const {
425 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
426 }
427
428 bool isSCSrcV2B16() const {
429 return isSCSrcB16();
430 }
431
432 bool isSCSrc_b32() const {
433 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
434 }
435
436 bool isSCSrc_b64() const {
437 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
438 }
439
440 bool isBoolReg() const;
441
442 bool isSCSrcF16() const {
443 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
444 }
445
446 bool isSCSrcV2F16() const {
447 return isSCSrcF16();
448 }
449
450 bool isSCSrcF32() const {
451 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
452 }
453
454 bool isSCSrcF64() const {
455 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
456 }
457
458 bool isSSrc_b32() const {
459 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
460 }
461
462 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
463
464 bool isSSrcV2B16() const {
465 llvm_unreachable("cannot happen");
466 return isSSrc_b16();
467 }
468
469 bool isSSrc_b64() const {
470 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
471 // See isVSrc64().
472 return isSCSrc_b64() || isLiteralImm(MVT::i64) ||
473 (((const MCTargetAsmParser *)AsmParser)
474 ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
475 isExpr());
476 }
477
478 bool isSSrc_f32() const {
479 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
480 }
481
482 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
483
484 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
485
486 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
487
488 bool isSSrcV2F16() const {
489 llvm_unreachable("cannot happen");
490 return isSSrc_f16();
491 }
492
493 bool isSSrcV2FP32() const {
494 llvm_unreachable("cannot happen");
495 return isSSrc_f32();
496 }
497
498 bool isSCSrcV2FP32() const {
499 llvm_unreachable("cannot happen");
500 return isSCSrcF32();
501 }
502
503 bool isSSrcV2INT32() const {
504 llvm_unreachable("cannot happen");
505 return isSSrc_b32();
506 }
507
508 bool isSCSrcV2INT32() const {
509 llvm_unreachable("cannot happen");
510 return isSCSrc_b32();
511 }
512
513 bool isSSrcOrLds_b32() const {
514 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
515 isLiteralImm(MVT::i32) || isExpr();
516 }
517
518 bool isVCSrc_b32() const {
519 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
520 }
521
522 bool isVCSrc_b32_Lo256() const {
523 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo256RegClassID, MVT::i32);
524 }
525
526 bool isVCSrc_b64_Lo256() const {
527 return isRegOrInlineNoMods(AMDGPU::VS_64_Lo256RegClassID, MVT::i64);
528 }
529
530 bool isVCSrc_b64() const {
531 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
532 }
533
534 bool isVCSrcT_b16() const {
535 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
536 }
537
538 bool isVCSrcTB16_Lo128() const {
539 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
540 }
541
542 bool isVCSrcFake16B16_Lo128() const {
543 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
544 }
545
546 bool isVCSrc_b16() const {
547 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
548 }
549
550 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
551
552 bool isVCSrc_f32() const {
553 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
554 }
555
556 bool isVCSrc_f64() const {
557 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
558 }
559
560 bool isVCSrcTBF16() const {
561 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
562 }
563
564 bool isVCSrcT_f16() const {
565 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
566 }
567
568 bool isVCSrcT_bf16() const {
569 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
570 }
571
572 bool isVCSrcTBF16_Lo128() const {
573 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
574 }
575
576 bool isVCSrcTF16_Lo128() const {
577 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
578 }
579
580 bool isVCSrcFake16BF16_Lo128() const {
581 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
582 }
583
584 bool isVCSrcFake16F16_Lo128() const {
585 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
586 }
587
588 bool isVCSrc_bf16() const {
589 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
590 }
591
592 bool isVCSrc_f16() const {
593 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
594 }
595
596 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
597
598 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
599
600 bool isVSrc_b32() const {
601 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
602 }
603
604 bool isVSrc_b64() const { return isVCSrc_f64() || isLiteralImm(MVT::i64); }
605
606 bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
607
608 bool isVSrcT_b16_Lo128() const {
609 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
610 }
611
612 bool isVSrcFake16_b16_Lo128() const {
613 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
614 }
615
616 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
617
618 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
619
620 bool isVCSrcV2FP32() const { return isVCSrc_f64(); }
621
622 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
623
624 bool isVCSrc_v2b32() const { return isVCSrc_b64(); }
625
626 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
627
628 bool isVSrc_f32() const {
629 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
630 }
631
632 bool isVSrc_f64() const { return isVCSrc_f64() || isLiteralImm(MVT::f64); }
633
634 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
635
636 bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
637
638 bool isVSrcT_bf16_Lo128() const {
639 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
640 }
641
642 bool isVSrcT_f16_Lo128() const {
643 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
644 }
645
646 bool isVSrcFake16_bf16_Lo128() const {
647 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
648 }
649
650 bool isVSrcFake16_f16_Lo128() const {
651 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
652 }
653
654 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
655
656 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
657
658 bool isVSrc_v2bf16() const {
659 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
660 }
661
662 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
663
664 bool isVSrc_NoInline_v2f16() const { return isVSrc_v2f16(); }
665
666 bool isVISrcB32() const {
667 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
668 }
669
670 bool isVISrcB16() const {
671 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
672 }
673
674 bool isVISrcV2B16() const {
675 return isVISrcB16();
676 }
677
678 bool isVISrcF32() const {
679 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
680 }
681
682 bool isVISrcF16() const {
683 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
684 }
685
686 bool isVISrcV2F16() const {
687 return isVISrcF16() || isVISrcB32();
688 }
689
690 bool isVISrc_64_bf16() const {
691 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
692 }
693
694 bool isVISrc_64_f16() const {
695 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
696 }
697
698 bool isVISrc_64_b32() const {
699 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
700 }
701
702 bool isVISrc_64B64() const {
703 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
704 }
705
706 bool isVISrc_64_f64() const {
707 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
708 }
709
710 bool isVISrc_64V2FP32() const {
711 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
712 }
713
714 bool isVISrc_64V2INT32() const {
715 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
716 }
717
718 bool isVISrc_256_b32() const {
719 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
720 }
721
722 bool isVISrc_256_f32() const {
723 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
724 }
725
726 bool isVISrc_256B64() const {
727 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
728 }
729
730 bool isVISrc_256_f64() const {
731 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
732 }
733
734 bool isVISrc_512_f64() const {
735 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64);
736 }
737
738 bool isVISrc_128B16() const {
739 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
740 }
741
742 bool isVISrc_128V2B16() const {
743 return isVISrc_128B16();
744 }
745
746 bool isVISrc_128_b32() const {
747 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
748 }
749
750 bool isVISrc_128_f32() const {
751 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
752 }
753
754 bool isVISrc_256V2FP32() const {
755 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
756 }
757
758 bool isVISrc_256V2INT32() const {
759 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
760 }
761
762 bool isVISrc_512_b32() const {
763 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
764 }
765
766 bool isVISrc_512B16() const {
767 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
768 }
769
770 bool isVISrc_512V2B16() const {
771 return isVISrc_512B16();
772 }
773
774 bool isVISrc_512_f32() const {
775 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
776 }
777
778 bool isVISrc_512F16() const {
779 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
780 }
781
782 bool isVISrc_512V2F16() const {
783 return isVISrc_512F16() || isVISrc_512_b32();
784 }
785
786 bool isVISrc_1024_b32() const {
787 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
788 }
789
790 bool isVISrc_1024B16() const {
791 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
792 }
793
794 bool isVISrc_1024V2B16() const {
795 return isVISrc_1024B16();
796 }
797
798 bool isVISrc_1024_f32() const {
799 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
800 }
801
802 bool isVISrc_1024F16() const {
803 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
804 }
805
806 bool isVISrc_1024V2F16() const {
807 return isVISrc_1024F16() || isVISrc_1024_b32();
808 }
809
810 bool isAISrcB32() const {
811 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
812 }
813
814 bool isAISrcB16() const {
815 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
816 }
817
818 bool isAISrcV2B16() const {
819 return isAISrcB16();
820 }
821
822 bool isAISrcF32() const {
823 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
824 }
825
826 bool isAISrcF16() const {
827 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
828 }
829
830 bool isAISrcV2F16() const {
831 return isAISrcF16() || isAISrcB32();
832 }
833
834 bool isAISrc_64B64() const {
835 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
836 }
837
838 bool isAISrc_64_f64() const {
839 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
840 }
841
842 bool isAISrc_128_b32() const {
843 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
844 }
845
846 bool isAISrc_128B16() const {
847 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
848 }
849
850 bool isAISrc_128V2B16() const {
851 return isAISrc_128B16();
852 }
853
854 bool isAISrc_128_f32() const {
855 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
856 }
857
858 bool isAISrc_128F16() const {
859 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
860 }
861
862 bool isAISrc_128V2F16() const {
863 return isAISrc_128F16() || isAISrc_128_b32();
864 }
865
866 bool isVISrc_128_bf16() const {
867 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
868 }
869
870 bool isVISrc_128_f16() const {
871 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
872 }
873
874 bool isVISrc_128V2F16() const {
875 return isVISrc_128_f16() || isVISrc_128_b32();
876 }
877
878 bool isAISrc_256B64() const {
879 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
880 }
881
882 bool isAISrc_256_f64() const {
883 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
884 }
885
886 bool isAISrc_512_b32() const {
887 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
888 }
889
890 bool isAISrc_512B16() const {
891 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
892 }
893
894 bool isAISrc_512V2B16() const {
895 return isAISrc_512B16();
896 }
897
898 bool isAISrc_512_f32() const {
899 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
900 }
901
902 bool isAISrc_512F16() const {
903 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
904 }
905
906 bool isAISrc_512V2F16() const {
907 return isAISrc_512F16() || isAISrc_512_b32();
908 }
909
910 bool isAISrc_1024_b32() const {
911 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
912 }
913
914 bool isAISrc_1024B16() const {
915 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
916 }
917
918 bool isAISrc_1024V2B16() const {
919 return isAISrc_1024B16();
920 }
921
922 bool isAISrc_1024_f32() const {
923 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
924 }
925
926 bool isAISrc_1024F16() const {
927 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
928 }
929
930 bool isAISrc_1024V2F16() const {
931 return isAISrc_1024F16() || isAISrc_1024_b32();
932 }
933
934 bool isKImmFP32() const {
935 return isLiteralImm(MVT::f32);
936 }
937
938 bool isKImmFP16() const {
939 return isLiteralImm(MVT::f16);
940 }
941
942 bool isKImmFP64() const { return isLiteralImm(MVT::f64); }
943
944 bool isMem() const override {
945 return false;
946 }
947
948 bool isExpr() const {
949 return Kind == Expression;
950 }
951
952 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
953
954 bool isSWaitCnt() const;
955 bool isDepCtr() const;
956 bool isSDelayALU() const;
957 bool isHwreg() const;
958 bool isSendMsg() const;
959 bool isSplitBarrier() const;
960 bool isSwizzle() const;
961 bool isSMRDOffset8() const;
962 bool isSMEMOffset() const;
963 bool isSMRDLiteralOffset() const;
964 bool isDPP8() const;
965 bool isDPPCtrl() const;
966 bool isBLGP() const;
967 bool isGPRIdxMode() const;
968 bool isS16Imm() const;
969 bool isU16Imm() const;
970 bool isEndpgm() const;
971
972 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
973 return [this, P]() { return P(*this); };
974 }
975
976 StringRef getToken() const {
977 assert(isToken());
978 return StringRef(Tok.Data, Tok.Length);
979 }
980
981 int64_t getImm() const {
982 assert(isImm());
983 return Imm.Val;
984 }
985
986 void setImm(int64_t Val) {
987 assert(isImm());
988 Imm.Val = Val;
989 }
990
991 ImmTy getImmTy() const {
992 assert(isImm());
993 return Imm.Type;
994 }
995
996 MCRegister getReg() const override {
997 assert(isRegKind());
998 return Reg.RegNo;
999 }
1000
1001 SMLoc getStartLoc() const override {
1002 return StartLoc;
1003 }
1004
1005 SMLoc getEndLoc() const override {
1006 return EndLoc;
1007 }
1008
1009 SMRange getLocRange() const {
1010 return SMRange(StartLoc, EndLoc);
1011 }
1012
1013 int getMCOpIdx() const { return MCOpIdx; }
1014
1015 Modifiers getModifiers() const {
1016 assert(isRegKind() || isImmTy(ImmTyNone));
1017 return isRegKind() ? Reg.Mods : Imm.Mods;
1018 }
1019
1020 void setModifiers(Modifiers Mods) {
1021 assert(isRegKind() || isImmTy(ImmTyNone));
1022 if (isRegKind())
1023 Reg.Mods = Mods;
1024 else
1025 Imm.Mods = Mods;
1026 }
1027
1028 bool hasModifiers() const {
1029 return getModifiers().hasModifiers();
1030 }
1031
1032 bool hasFPModifiers() const {
1033 return getModifiers().hasFPModifiers();
1034 }
1035
1036 bool hasIntModifiers() const {
1037 return getModifiers().hasIntModifiers();
1038 }
1039
1040 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1041
1042 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1043
1044 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1045
1046 void addRegOperands(MCInst &Inst, unsigned N) const;
1047
1048 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1049 if (isRegKind())
1050 addRegOperands(Inst, N);
1051 else
1052 addImmOperands(Inst, N);
1053 }
1054
1055 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1056 Modifiers Mods = getModifiers();
1057 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1058 if (isRegKind()) {
1059 addRegOperands(Inst, N);
1060 } else {
1061 addImmOperands(Inst, N, false);
1062 }
1063 }
1064
1065 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1066 assert(!hasIntModifiers());
1067 addRegOrImmWithInputModsOperands(Inst, N);
1068 }
1069
1070 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1071 assert(!hasFPModifiers());
1072 addRegOrImmWithInputModsOperands(Inst, N);
1073 }
1074
1075 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1076 Modifiers Mods = getModifiers();
1077 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1078 assert(isRegKind());
1079 addRegOperands(Inst, N);
1080 }
1081
1082 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1083 assert(!hasIntModifiers());
1084 addRegWithInputModsOperands(Inst, N);
1085 }
1086
1087 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1088 assert(!hasFPModifiers());
1089 addRegWithInputModsOperands(Inst, N);
1090 }
1091
1092 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1093 // clang-format off
1094 switch (Type) {
1095 case ImmTyNone: OS << "None"; break;
1096 case ImmTyGDS: OS << "GDS"; break;
1097 case ImmTyLDS: OS << "LDS"; break;
1098 case ImmTyOffen: OS << "Offen"; break;
1099 case ImmTyIdxen: OS << "Idxen"; break;
1100 case ImmTyAddr64: OS << "Addr64"; break;
1101 case ImmTyOffset: OS << "Offset"; break;
1102 case ImmTyInstOffset: OS << "InstOffset"; break;
1103 case ImmTyOffset0: OS << "Offset0"; break;
1104 case ImmTyOffset1: OS << "Offset1"; break;
1105 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1106 case ImmTyCPol: OS << "CPol"; break;
1107 case ImmTyIndexKey8bit: OS << "index_key"; break;
1108 case ImmTyIndexKey16bit: OS << "index_key"; break;
1109 case ImmTyIndexKey32bit: OS << "index_key"; break;
1110 case ImmTyTFE: OS << "TFE"; break;
1111 case ImmTyD16: OS << "D16"; break;
1112 case ImmTyFORMAT: OS << "FORMAT"; break;
1113 case ImmTyClamp: OS << "Clamp"; break;
1114 case ImmTyOModSI: OS << "OModSI"; break;
1115 case ImmTyDPP8: OS << "DPP8"; break;
1116 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1117 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1118 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1119 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1120 case ImmTyDppFI: OS << "DppFI"; break;
1121 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1122 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1123 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1124 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1125 case ImmTyDMask: OS << "DMask"; break;
1126 case ImmTyDim: OS << "Dim"; break;
1127 case ImmTyUNorm: OS << "UNorm"; break;
1128 case ImmTyDA: OS << "DA"; break;
1129 case ImmTyR128A16: OS << "R128A16"; break;
1130 case ImmTyA16: OS << "A16"; break;
1131 case ImmTyLWE: OS << "LWE"; break;
1132 case ImmTyOff: OS << "Off"; break;
1133 case ImmTyExpTgt: OS << "ExpTgt"; break;
1134 case ImmTyExpCompr: OS << "ExpCompr"; break;
1135 case ImmTyExpVM: OS << "ExpVM"; break;
1136 case ImmTyHwreg: OS << "Hwreg"; break;
1137 case ImmTySendMsg: OS << "SendMsg"; break;
1138 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1139 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1140 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1141 case ImmTyOpSel: OS << "OpSel"; break;
1142 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1143 case ImmTyNegLo: OS << "NegLo"; break;
1144 case ImmTyNegHi: OS << "NegHi"; break;
1145 case ImmTySwizzle: OS << "Swizzle"; break;
1146 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1147 case ImmTyHigh: OS << "High"; break;
1148 case ImmTyBLGP: OS << "BLGP"; break;
1149 case ImmTyCBSZ: OS << "CBSZ"; break;
1150 case ImmTyABID: OS << "ABID"; break;
1151 case ImmTyEndpgm: OS << "Endpgm"; break;
1152 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1153 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1154 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1155 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1156 case ImmTyBitOp3: OS << "BitOp3"; break;
1157 case ImmTyMatrixAFMT: OS << "ImmTyMatrixAFMT"; break;
1158 case ImmTyMatrixBFMT: OS << "ImmTyMatrixBFMT"; break;
1159 case ImmTyMatrixAScale: OS << "ImmTyMatrixAScale"; break;
1160 case ImmTyMatrixBScale: OS << "ImmTyMatrixBScale"; break;
1161 case ImmTyMatrixAScaleFmt: OS << "ImmTyMatrixAScaleFmt"; break;
1162 case ImmTyMatrixBScaleFmt: OS << "ImmTyMatrixBScaleFmt"; break;
1163 case ImmTyMatrixAReuse: OS << "ImmTyMatrixAReuse"; break;
1164 case ImmTyMatrixBReuse: OS << "ImmTyMatrixBReuse"; break;
1165 case ImmTyScaleSel: OS << "ScaleSel" ; break;
1166 case ImmTyByteSel: OS << "ByteSel" ; break;
1167 }
1168 // clang-format on
1169 }
1170
1171 void print(raw_ostream &OS, const MCAsmInfo &MAI) const override {
1172 switch (Kind) {
1173 case Register:
1174 OS << "<register " << AMDGPUInstPrinter::getRegisterName(getReg())
1175 << " mods: " << Reg.Mods << '>';
1176 break;
1177 case Immediate:
1178 OS << '<' << getImm();
1179 if (getImmTy() != ImmTyNone) {
1180 OS << " type: "; printImmTy(OS, getImmTy());
1181 }
1182 OS << " mods: " << Imm.Mods << '>';
1183 break;
1184 case Token:
1185 OS << '\'' << getToken() << '\'';
1186 break;
1187 case Expression:
1188 OS << "<expr ";
1189 MAI.printExpr(OS, *Expr);
1190 OS << '>';
1191 break;
1192 }
1193 }
1194
1195 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1196 int64_t Val, SMLoc Loc,
1197 ImmTy Type = ImmTyNone,
1198 bool IsFPImm = false) {
1199 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1200 Op->Imm.Val = Val;
1201 Op->Imm.IsFPImm = IsFPImm;
1202 Op->Imm.Type = Type;
1203 Op->Imm.Mods = Modifiers();
1204 Op->StartLoc = Loc;
1205 Op->EndLoc = Loc;
1206 return Op;
1207 }
1208
1209 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1210 StringRef Str, SMLoc Loc,
1211 bool HasExplicitEncodingSize = true) {
1212 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1213 Res->Tok.Data = Str.data();
1214 Res->Tok.Length = Str.size();
1215 Res->StartLoc = Loc;
1216 Res->EndLoc = Loc;
1217 return Res;
1218 }
1219
1220 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1221 MCRegister Reg, SMLoc S, SMLoc E) {
1222 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1223 Op->Reg.RegNo = Reg;
1224 Op->Reg.Mods = Modifiers();
1225 Op->StartLoc = S;
1226 Op->EndLoc = E;
1227 return Op;
1228 }
1229
1230 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1231 const class MCExpr *Expr, SMLoc S) {
1232 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1233 Op->Expr = Expr;
1234 Op->StartLoc = S;
1235 Op->EndLoc = S;
1236 return Op;
1237 }
1238};
1239
1240raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1241 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1242 return OS;
1243}
1244
1245//===----------------------------------------------------------------------===//
1246// AsmParser
1247//===----------------------------------------------------------------------===//
1248
1249// TODO: define GET_SUBTARGET_FEATURE_NAME
1250#define GET_REGISTER_MATCHER
1251#include "AMDGPUGenAsmMatcher.inc"
1252#undef GET_REGISTER_MATCHER
1253#undef GET_SUBTARGET_FEATURE_NAME
1254
1255// Holds info related to the current kernel, e.g. count of SGPRs used.
1256// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1257// .amdgpu_hsa_kernel or at EOF.
1258class KernelScopeInfo {
1259 int SgprIndexUnusedMin = -1;
1260 int VgprIndexUnusedMin = -1;
1261 int AgprIndexUnusedMin = -1;
1262 MCContext *Ctx = nullptr;
1263 MCSubtargetInfo const *MSTI = nullptr;
1264
1265 void usesSgprAt(int i) {
1266 if (i >= SgprIndexUnusedMin) {
1267 SgprIndexUnusedMin = ++i;
1268 if (Ctx) {
1269 MCSymbol* const Sym =
1270 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1271 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1272 }
1273 }
1274 }
1275
1276 void usesVgprAt(int i) {
1277 if (i >= VgprIndexUnusedMin) {
1278 VgprIndexUnusedMin = ++i;
1279 if (Ctx) {
1280 MCSymbol* const Sym =
1281 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1282 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1283 VgprIndexUnusedMin);
1284 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1285 }
1286 }
1287 }
1288
1289 void usesAgprAt(int i) {
1290 // Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction
1291 if (!hasMAIInsts(*MSTI))
1292 return;
1293
1294 if (i >= AgprIndexUnusedMin) {
1295 AgprIndexUnusedMin = ++i;
1296 if (Ctx) {
1297 MCSymbol* const Sym =
1298 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1299 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1300
1301 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1302 MCSymbol* const vSym =
1303 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1304 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1305 VgprIndexUnusedMin);
1306 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1307 }
1308 }
1309 }
1310
1311public:
1312 KernelScopeInfo() = default;
1313
1314 void initialize(MCContext &Context) {
1315 Ctx = &Context;
1316 MSTI = Ctx->getSubtargetInfo();
1317
1318 usesSgprAt(SgprIndexUnusedMin = -1);
1319 usesVgprAt(VgprIndexUnusedMin = -1);
1320 if (hasMAIInsts(*MSTI)) {
1321 usesAgprAt(AgprIndexUnusedMin = -1);
1322 }
1323 }
1324
1325 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1326 unsigned RegWidth) {
1327 switch (RegKind) {
1328 case IS_SGPR:
1329 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1330 break;
1331 case IS_AGPR:
1332 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1333 break;
1334 case IS_VGPR:
1335 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1336 break;
1337 default:
1338 break;
1339 }
1340 }
1341};
1342
1343class AMDGPUAsmParser : public MCTargetAsmParser {
1344 MCAsmParser &Parser;
1345
1346 unsigned ForcedEncodingSize = 0;
1347 bool ForcedDPP = false;
1348 bool ForcedSDWA = false;
1349 KernelScopeInfo KernelScope;
1350 const unsigned HwMode;
1351
1352 /// @name Auto-generated Match Functions
1353 /// {
1354
1355#define GET_ASSEMBLER_HEADER
1356#include "AMDGPUGenAsmMatcher.inc"
1357
1358 /// }
1359
1360 /// Get size of register operand
1361 unsigned getRegOperandSize(const MCInstrDesc &Desc, unsigned OpNo) const {
1362 assert(OpNo < Desc.NumOperands);
1363 int16_t RCID = MII.getOpRegClassID(Desc.operands()[OpNo], HwMode);
1364 return getRegBitWidth(RCID) / 8;
1365 }
1366
1367private:
1368 void createConstantSymbol(StringRef Id, int64_t Val);
1369
1370 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1371 bool OutOfRangeError(SMRange Range);
1372 /// Calculate VGPR/SGPR blocks required for given target, reserved
1373 /// registers, and user-specified NextFreeXGPR values.
1374 ///
1375 /// \param Features [in] Target features, used for bug corrections.
1376 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1377 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1378 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1379 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1380 /// descriptor field, if valid.
1381 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1382 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1383 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1384 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1385 /// \param VGPRBlocks [out] Result VGPR block count.
1386 /// \param SGPRBlocks [out] Result SGPR block count.
1387 bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1388 const MCExpr *FlatScrUsed, bool XNACKUsed,
1389 std::optional<bool> EnableWavefrontSize32,
1390 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1391 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1392 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
1393 bool ParseDirectiveAMDGCNTarget();
1394 bool ParseDirectiveAMDHSACodeObjectVersion();
1395 bool ParseDirectiveAMDHSAKernel();
1396 bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1397 bool ParseDirectiveAMDKernelCodeT();
1398 // TODO: Possibly make subtargetHasRegister const.
1399 bool subtargetHasRegister(const MCRegisterInfo &MRI, MCRegister Reg);
1400 bool ParseDirectiveAMDGPUHsaKernel();
1401
1402 bool ParseDirectiveISAVersion();
1403 bool ParseDirectiveHSAMetadata();
1404 bool ParseDirectivePALMetadataBegin();
1405 bool ParseDirectivePALMetadata();
1406 bool ParseDirectiveAMDGPULDS();
1407
1408 /// Common code to parse out a block of text (typically YAML) between start and
1409 /// end directives.
1410 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1411 const char *AssemblerDirectiveEnd,
1412 std::string &CollectString);
1413
1414 bool AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
1415 RegisterKind RegKind, MCRegister Reg1, SMLoc Loc);
1416 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1417 unsigned &RegNum, unsigned &RegWidth,
1418 bool RestoreOnFailure = false);
1419 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1420 unsigned &RegNum, unsigned &RegWidth,
1421 SmallVectorImpl<AsmToken> &Tokens);
1422 MCRegister ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1423 unsigned &RegWidth,
1424 SmallVectorImpl<AsmToken> &Tokens);
1425 MCRegister ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1426 unsigned &RegWidth,
1427 SmallVectorImpl<AsmToken> &Tokens);
1428 MCRegister ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1429 unsigned &RegWidth,
1430 SmallVectorImpl<AsmToken> &Tokens);
1431 bool ParseRegRange(unsigned &Num, unsigned &Width, unsigned &SubReg);
1432 MCRegister getRegularReg(RegisterKind RegKind, unsigned RegNum,
1433 unsigned SubReg, unsigned RegWidth, SMLoc Loc);
1434
1435 bool isRegister();
1436 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1437 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1438 void initializeGprCountSymbol(RegisterKind RegKind);
1439 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1440 unsigned RegWidth);
1441 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1442 bool IsAtomic);
1443
1444public:
1445 enum OperandMode {
1446 OperandMode_Default,
1447 OperandMode_NSA,
1448 };
1449
1450 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1451
1452 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1453 const MCInstrInfo &MII, const MCTargetOptions &Options)
1454 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser),
1455 HwMode(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)) {
1457
1458 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1459
1460 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1461 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1462 createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
1463 createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
1464 createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping);
1465 } else {
1466 createConstantSymbol(".option.machine_version_major", ISA.Major);
1467 createConstantSymbol(".option.machine_version_minor", ISA.Minor);
1468 createConstantSymbol(".option.machine_version_stepping", ISA.Stepping);
1469 }
1470 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1471 initializeGprCountSymbol(IS_VGPR);
1472 initializeGprCountSymbol(IS_SGPR);
1473 } else
1474 KernelScope.initialize(getContext());
1475
1476 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1477 createConstantSymbol(Symbol, Code);
1478
1479 createConstantSymbol("UC_VERSION_W64_BIT", 0x2000);
1480 createConstantSymbol("UC_VERSION_W32_BIT", 0x4000);
1481 createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000);
1482 }
1483
1484 bool hasMIMG_R128() const {
1485 return AMDGPU::hasMIMG_R128(getSTI());
1486 }
1487
1488 bool hasPackedD16() const {
1489 return AMDGPU::hasPackedD16(getSTI());
1490 }
1491
1492 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1493
1494 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1495
1496 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1497
1498 bool isSI() const {
1499 return AMDGPU::isSI(getSTI());
1500 }
1501
1502 bool isCI() const {
1503 return AMDGPU::isCI(getSTI());
1504 }
1505
1506 bool isVI() const {
1507 return AMDGPU::isVI(getSTI());
1508 }
1509
1510 bool isGFX9() const {
1511 return AMDGPU::isGFX9(getSTI());
1512 }
1513
1514 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1515 bool isGFX90A() const {
1516 return AMDGPU::isGFX90A(getSTI());
1517 }
1518
1519 bool isGFX940() const {
1520 return AMDGPU::isGFX940(getSTI());
1521 }
1522
1523 bool isGFX9Plus() const {
1524 return AMDGPU::isGFX9Plus(getSTI());
1525 }
1526
1527 bool isGFX10() const {
1528 return AMDGPU::isGFX10(getSTI());
1529 }
1530
1531 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1532
1533 bool isGFX11() const {
1534 return AMDGPU::isGFX11(getSTI());
1535 }
1536
1537 bool isGFX11Plus() const {
1538 return AMDGPU::isGFX11Plus(getSTI());
1539 }
1540
1541 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1542
1543 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1544
1545 bool isGFX1250() const { return AMDGPU::isGFX1250(getSTI()); }
1546
1547 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1548
1549 bool isGFX10_BEncoding() const {
1550 return AMDGPU::isGFX10_BEncoding(getSTI());
1551 }
1552
1553 bool isWave32() const { return getAvailableFeatures()[Feature_isWave32Bit]; }
1554
1555 bool isWave64() const { return getAvailableFeatures()[Feature_isWave64Bit]; }
1556
1557 bool hasInv2PiInlineImm() const {
1558 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1559 }
1560
1561 bool has64BitLiterals() const {
1562 return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1563 }
1564
1565 bool hasFlatOffsets() const {
1566 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1567 }
1568
1569 bool hasTrue16Insts() const {
1570 return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1571 }
1572
1573 bool hasArchitectedFlatScratch() const {
1574 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1575 }
1576
1577 bool hasSGPR102_SGPR103() const {
1578 return !isVI() && !isGFX9();
1579 }
1580
1581 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1582
1583 bool hasIntClamp() const {
1584 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1585 }
1586
1587 bool hasPartialNSAEncoding() const {
1588 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1589 }
1590
1591 bool hasGloballyAddressableScratch() const {
1592 return getFeatureBits()[AMDGPU::FeatureGloballyAddressableScratch];
1593 }
1594
1595 unsigned getNSAMaxSize(bool HasSampler = false) const {
1596 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1597 }
1598
1599 unsigned getMaxNumUserSGPRs() const {
1600 return AMDGPU::getMaxNumUserSGPRs(getSTI());
1601 }
1602
1603 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1604
1605 AMDGPUTargetStreamer &getTargetStreamer() {
1606 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1607 return static_cast<AMDGPUTargetStreamer &>(TS);
1608 }
1609
1610 MCContext &getContext() const {
1611 // We need this const_cast because for some reason getContext() is not const
1612 // in MCAsmParser.
1613 return const_cast<AMDGPUAsmParser *>(this)->MCTargetAsmParser::getContext();
1614 }
1615
1616 const MCRegisterInfo *getMRI() const {
1617 return getContext().getRegisterInfo();
1618 }
1619
1620 const MCInstrInfo *getMII() const {
1621 return &MII;
1622 }
1623
1624 // FIXME: This should not be used. Instead, should use queries derived from
1625 // getAvailableFeatures().
1626 const FeatureBitset &getFeatureBits() const {
1627 return getSTI().getFeatureBits();
1628 }
1629
1630 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1631 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1632 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1633
1634 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1635 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1636 bool isForcedDPP() const { return ForcedDPP; }
1637 bool isForcedSDWA() const { return ForcedSDWA; }
1638 ArrayRef<unsigned> getMatchedVariants() const;
1639 StringRef getMatchedVariantName() const;
1640
1641 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1642 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1643 bool RestoreOnFailure);
1644 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1645 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1646 SMLoc &EndLoc) override;
1647 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1648 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1649 unsigned Kind) override;
1650 bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1651 OperandVector &Operands, MCStreamer &Out,
1652 uint64_t &ErrorInfo,
1653 bool MatchingInlineAsm) override;
1654 bool ParseDirective(AsmToken DirectiveID) override;
1655 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1656 OperandMode Mode = OperandMode_Default);
1657 StringRef parseMnemonicSuffix(StringRef Name);
1658 bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1659 SMLoc NameLoc, OperandVector &Operands) override;
1660 //bool ProcessInstruction(MCInst &Inst);
1661
1662 ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1663
1664 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1665
1666 ParseStatus
1667 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1668 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1669 std::function<bool(int64_t &)> ConvertResult = nullptr);
1670
1671 ParseStatus parseOperandArrayWithPrefix(
1672 const char *Prefix, OperandVector &Operands,
1673 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1674 bool (*ConvertResult)(int64_t &) = nullptr);
1675
1676 ParseStatus
1677 parseNamedBit(StringRef Name, OperandVector &Operands,
1678 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1679 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1680 ParseStatus parseCPol(OperandVector &Operands);
1681 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1682 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1683 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1684 SMLoc &StringLoc);
1685 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1686 StringRef Name,
1687 ArrayRef<const char *> Ids,
1688 int64_t &IntVal);
1689 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1690 StringRef Name,
1691 ArrayRef<const char *> Ids,
1692 AMDGPUOperand::ImmTy Type);
1693
1694 bool isModifier();
1695 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1696 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1697 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1698 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1699 bool parseSP3NegModifier();
1700 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1701 LitModifier Lit = LitModifier::None);
1702 ParseStatus parseReg(OperandVector &Operands);
1703 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1704 LitModifier Lit = LitModifier::None);
1705 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1706 bool AllowImm = true);
1707 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1708 bool AllowImm = true);
1709 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1710 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1711 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1712 ParseStatus tryParseIndexKey(OperandVector &Operands,
1713 AMDGPUOperand::ImmTy ImmTy);
1714 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1715 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1716 ParseStatus parseIndexKey32bit(OperandVector &Operands);
1717 ParseStatus tryParseMatrixFMT(OperandVector &Operands, StringRef Name,
1718 AMDGPUOperand::ImmTy Type);
1719 ParseStatus parseMatrixAFMT(OperandVector &Operands);
1720 ParseStatus parseMatrixBFMT(OperandVector &Operands);
1721 ParseStatus tryParseMatrixScale(OperandVector &Operands, StringRef Name,
1722 AMDGPUOperand::ImmTy Type);
1723 ParseStatus parseMatrixAScale(OperandVector &Operands);
1724 ParseStatus parseMatrixBScale(OperandVector &Operands);
1725 ParseStatus tryParseMatrixScaleFmt(OperandVector &Operands, StringRef Name,
1726 AMDGPUOperand::ImmTy Type);
1727 ParseStatus parseMatrixAScaleFmt(OperandVector &Operands);
1728 ParseStatus parseMatrixBScaleFmt(OperandVector &Operands);
1729
1730 ParseStatus parseDfmtNfmt(int64_t &Format);
1731 ParseStatus parseUfmt(int64_t &Format);
1732 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1733 int64_t &Format);
1734 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1735 int64_t &Format);
1736 ParseStatus parseFORMAT(OperandVector &Operands);
1737 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1738 ParseStatus parseNumericFormat(int64_t &Format);
1739 ParseStatus parseFlatOffset(OperandVector &Operands);
1740 ParseStatus parseR128A16(OperandVector &Operands);
1741 ParseStatus parseBLGP(OperandVector &Operands);
1742 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1743 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1744
1745 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1746
1747 bool parseCnt(int64_t &IntVal);
1748 ParseStatus parseSWaitCnt(OperandVector &Operands);
1749
1750 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1751 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1752 ParseStatus parseDepCtr(OperandVector &Operands);
1753
1754 bool parseDelay(int64_t &Delay);
1755 ParseStatus parseSDelayALU(OperandVector &Operands);
1756
1757 ParseStatus parseHwreg(OperandVector &Operands);
1758
1759private:
1760 struct OperandInfoTy {
1761 SMLoc Loc;
1762 int64_t Val;
1763 bool IsSymbolic = false;
1764 bool IsDefined = false;
1765
1766 OperandInfoTy(int64_t Val) : Val(Val) {}
1767 };
1768
1769 struct StructuredOpField : OperandInfoTy {
1770 StringLiteral Id;
1771 StringLiteral Desc;
1772 unsigned Width;
1773 bool IsDefined = false;
1774
1775 StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width,
1776 int64_t Default)
1777 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1778 virtual ~StructuredOpField() = default;
1779
1780 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1781 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1782 return false;
1783 }
1784
1785 virtual bool validate(AMDGPUAsmParser &Parser) const {
1786 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1787 return Error(Parser, "not supported on this GPU");
1788 if (!isUIntN(Width, Val))
1789 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1790 return true;
1791 }
1792 };
1793
1794 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1795 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1796
1797 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1798 bool validateSendMsg(const OperandInfoTy &Msg,
1799 const OperandInfoTy &Op,
1800 const OperandInfoTy &Stream);
1801
1802 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1803 OperandInfoTy &Width);
1804
1805 static SMLoc getLaterLoc(SMLoc a, SMLoc b);
1806
1807 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1808 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1809 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1810
1811 SMLoc getOperandLoc(const OperandVector &Operands, int MCOpIdx) const;
1812 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1813 const OperandVector &Operands) const;
1814 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type,
1815 const OperandVector &Operands) const;
1816 SMLoc getInstLoc(const OperandVector &Operands) const;
1817
1818 bool validateInstruction(const MCInst &Inst, SMLoc IDLoc,
1819 const OperandVector &Operands);
1820 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1821 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1822 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1823 bool validateSOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1824 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1825 std::optional<unsigned> checkVOPDRegBankConstraints(const MCInst &Inst,
1826 bool AsVOPD3);
1827 bool validateVOPD(const MCInst &Inst, const OperandVector &Operands);
1828 bool tryVOPD(const MCInst &Inst);
1829 bool tryVOPD3(const MCInst &Inst);
1830 bool tryAnotherVOPDEncoding(const MCInst &Inst);
1831
1832 bool validateIntClampSupported(const MCInst &Inst);
1833 bool validateMIMGAtomicDMask(const MCInst &Inst);
1834 bool validateMIMGGatherDMask(const MCInst &Inst);
1835 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1836 bool validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc);
1837 bool validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc);
1838 bool validateMIMGD16(const MCInst &Inst);
1839 bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);
1840 bool validateTensorR128(const MCInst &Inst);
1841 bool validateMIMGMSAA(const MCInst &Inst);
1842 bool validateOpSel(const MCInst &Inst);
1843 bool validateTrue16OpSel(const MCInst &Inst);
1844 bool validateNeg(const MCInst &Inst, AMDGPU::OpName OpName);
1845 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1846 bool validateVccOperand(MCRegister Reg) const;
1847 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1848 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1849 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1850 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1851 bool validateAGPRLdSt(const MCInst &Inst) const;
1852 bool validateVGPRAlign(const MCInst &Inst) const;
1853 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1854 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1855 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1856 bool validateDivScale(const MCInst &Inst);
1857 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1858 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1859 SMLoc IDLoc);
1860 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1861 const unsigned CPol);
1862 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1863 bool validateSetVgprMSB(const MCInst &Inst, const OperandVector &Operands);
1864 bool validateLdsDirect(const MCInst &Inst, const OperandVector &Operands);
1865 bool validateWMMA(const MCInst &Inst, const OperandVector &Operands);
1866 unsigned getConstantBusLimit(unsigned Opcode) const;
1867 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1868 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1869 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1870
1871 bool isSupportedMnemo(StringRef Mnemo,
1872 const FeatureBitset &FBS);
1873 bool isSupportedMnemo(StringRef Mnemo,
1874 const FeatureBitset &FBS,
1875 ArrayRef<unsigned> Variants);
1876 bool checkUnsupportedInstruction(StringRef Name, SMLoc IDLoc);
1877
1878 bool isId(const StringRef Id) const;
1879 bool isId(const AsmToken &Token, const StringRef Id) const;
1880 bool isToken(const AsmToken::TokenKind Kind) const;
1881 StringRef getId() const;
1882 bool trySkipId(const StringRef Id);
1883 bool trySkipId(const StringRef Pref, const StringRef Id);
1884 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1885 bool trySkipToken(const AsmToken::TokenKind Kind);
1886 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1887 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1888 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1889
1890 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1891 AsmToken::TokenKind getTokenKind() const;
1892 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1894 StringRef getTokenStr() const;
1895 AsmToken peekToken(bool ShouldSkipSpace = true);
1896 AsmToken getToken() const;
1897 SMLoc getLoc() const;
1898 void lex();
1899
1900public:
1901 void onBeginOfFile() override;
1902 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1903
1904 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1905
1906 ParseStatus parseExpTgt(OperandVector &Operands);
1907 ParseStatus parseSendMsg(OperandVector &Operands);
1908 ParseStatus parseInterpSlot(OperandVector &Operands);
1909 ParseStatus parseInterpAttr(OperandVector &Operands);
1910 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1911 ParseStatus parseBoolReg(OperandVector &Operands);
1912
1913 bool parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
1914 const unsigned MaxVal, const Twine &ErrMsg,
1915 SMLoc &Loc);
1916 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1917 const unsigned MinVal,
1918 const unsigned MaxVal,
1919 const StringRef ErrMsg);
1920 ParseStatus parseSwizzle(OperandVector &Operands);
1921 bool parseSwizzleOffset(int64_t &Imm);
1922 bool parseSwizzleMacro(int64_t &Imm);
1923 bool parseSwizzleQuadPerm(int64_t &Imm);
1924 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1925 bool parseSwizzleBroadcast(int64_t &Imm);
1926 bool parseSwizzleSwap(int64_t &Imm);
1927 bool parseSwizzleReverse(int64_t &Imm);
1928 bool parseSwizzleFFT(int64_t &Imm);
1929 bool parseSwizzleRotate(int64_t &Imm);
1930
1931 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1932 int64_t parseGPRIdxMacro();
1933
1934 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1935 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1936
1937 ParseStatus parseOModSI(OperandVector &Operands);
1938
1939 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1940 OptionalImmIndexMap &OptionalIdx);
1941 void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands);
1942 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1943 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1944 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1945 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1946
1947 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1948 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1949 OptionalImmIndexMap &OptionalIdx);
1950 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1951 OptionalImmIndexMap &OptionalIdx);
1952
1953 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1954 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1955 void cvtOpSelHelper(MCInst &Inst, unsigned OpSel);
1956
1957 bool parseDimId(unsigned &Encoding);
1958 ParseStatus parseDim(OperandVector &Operands);
1959 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1960 ParseStatus parseDPP8(OperandVector &Operands);
1961 ParseStatus parseDPPCtrl(OperandVector &Operands);
1962 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1963 int64_t parseDPPCtrlSel(StringRef Ctrl);
1964 int64_t parseDPPCtrlPerm();
1965 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1966 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1967 cvtDPP(Inst, Operands, true);
1968 }
1969 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1970 bool IsDPP8 = false);
1971 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1972 cvtVOP3DPP(Inst, Operands, true);
1973 }
1974
1975 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1976 AMDGPUOperand::ImmTy Type);
1977 ParseStatus parseSDWADstUnused(OperandVector &Operands);
1978 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1979 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1980 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1981 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1982 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1983 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1984 uint64_t BasicInstType,
1985 bool SkipDstVcc = false,
1986 bool SkipSrcVcc = false);
1987
1988 ParseStatus parseEndpgm(OperandVector &Operands);
1989
1990 ParseStatus parseVOPD(OperandVector &Operands);
1991};
1992
1993} // end anonymous namespace
1994
1995// May be called with integer type with equivalent bitwidth.
1996static const fltSemantics *getFltSemantics(unsigned Size) {
1997 switch (Size) {
1998 case 4:
1999 return &APFloat::IEEEsingle();
2000 case 8:
2001 return &APFloat::IEEEdouble();
2002 case 2:
2003 return &APFloat::IEEEhalf();
2004 default:
2005 llvm_unreachable("unsupported fp type");
2006 }
2007}
2008
2010 return getFltSemantics(VT.getSizeInBits() / 8);
2011}
2012
2014 switch (OperandType) {
2015 // When floating-point immediate is used as operand of type i16, the 32-bit
2016 // representation of the constant truncated to the 16 LSBs should be used.
2031 return &APFloat::IEEEsingle();
2038 return &APFloat::IEEEdouble();
2045 return &APFloat::IEEEhalf();
2050 return &APFloat::BFloat();
2051 default:
2052 llvm_unreachable("unsupported fp type");
2053 }
2054}
2055
2056//===----------------------------------------------------------------------===//
2057// Operand
2058//===----------------------------------------------------------------------===//
2059
2060static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
2061 bool Lost;
2062
2063 // Convert literal to single precision
2066 &Lost);
2067 // We allow precision lost but not overflow or underflow
2068 if (Status != APFloat::opOK &&
2069 Lost &&
2070 ((Status & APFloat::opOverflow) != 0 ||
2071 (Status & APFloat::opUnderflow) != 0)) {
2072 return false;
2073 }
2074
2075 return true;
2076}
2077
2078static bool isSafeTruncation(int64_t Val, unsigned Size) {
2079 return isUIntN(Size, Val) || isIntN(Size, Val);
2080}
2081
2082static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2083 if (VT.getScalarType() == MVT::i16)
2084 return isInlinableLiteral32(Val, HasInv2Pi);
2085
2086 if (VT.getScalarType() == MVT::f16)
2087 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2088
2089 assert(VT.getScalarType() == MVT::bf16);
2090
2091 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2092}
2093
2094bool AMDGPUOperand::isInlinableImm(MVT type) const {
2095
2096 // This is a hack to enable named inline values like
2097 // shared_base with both 32-bit and 64-bit operands.
2098 // Note that these values are defined as
2099 // 32-bit operands only.
2100 if (isInlineValue()) {
2101 return true;
2102 }
2103
2104 if (!isImmTy(ImmTyNone)) {
2105 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2106 return false;
2107 }
2108
2109 if (getModifiers().Lit != LitModifier::None)
2110 return false;
2111
2112 // TODO: We should avoid using host float here. It would be better to
2113 // check the float bit values which is what a few other places do.
2114 // We've had bot failures before due to weird NaN support on mips hosts.
2115
2116 APInt Literal(64, Imm.Val);
2117
2118 if (Imm.IsFPImm) { // We got fp literal token
2119 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2121 AsmParser->hasInv2PiInlineImm());
2122 }
2123
2124 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2125 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2126 return false;
2127
2128 if (type.getScalarSizeInBits() == 16) {
2129 bool Lost = false;
2130 switch (type.getScalarType().SimpleTy) {
2131 default:
2132 llvm_unreachable("unknown 16-bit type");
2133 case MVT::bf16:
2134 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2135 &Lost);
2136 break;
2137 case MVT::f16:
2138 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2139 &Lost);
2140 break;
2141 case MVT::i16:
2142 FPLiteral.convert(APFloatBase::IEEEsingle(),
2143 APFloat::rmNearestTiesToEven, &Lost);
2144 break;
2145 }
2146 // We need to use 32-bit representation here because when a floating-point
2147 // inline constant is used as an i16 operand, its 32-bit representation
2148 // representation will be used. We will need the 32-bit value to check if
2149 // it is FP inline constant.
2150 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2151 return isInlineableLiteralOp16(ImmVal, type,
2152 AsmParser->hasInv2PiInlineImm());
2153 }
2154
2155 // Check if single precision literal is inlinable
2157 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2158 AsmParser->hasInv2PiInlineImm());
2159 }
2160
2161 // We got int literal token.
2162 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2164 AsmParser->hasInv2PiInlineImm());
2165 }
2166
2167 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2168 return false;
2169 }
2170
2171 if (type.getScalarSizeInBits() == 16) {
2173 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2174 type, AsmParser->hasInv2PiInlineImm());
2175 }
2176
2178 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2179 AsmParser->hasInv2PiInlineImm());
2180}
2181
2182bool AMDGPUOperand::isLiteralImm(MVT type) const {
2183 // Check that this immediate can be added as literal
2184 if (!isImmTy(ImmTyNone)) {
2185 return false;
2186 }
2187
2188 bool Allow64Bit =
2189 (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();
2190
2191 if (!Imm.IsFPImm) {
2192 // We got int literal token.
2193
2194 if (type == MVT::f64 && hasFPModifiers()) {
2195 // Cannot apply fp modifiers to int literals preserving the same semantics
2196 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2197 // disable these cases.
2198 return false;
2199 }
2200
2201 unsigned Size = type.getSizeInBits();
2202 if (Size == 64) {
2203 if (Allow64Bit && !AMDGPU::isValid32BitLiteral(Imm.Val, false))
2204 return true;
2205 Size = 32;
2206 }
2207
2208 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2209 // types.
2210 return isSafeTruncation(Imm.Val, Size);
2211 }
2212
2213 // We got fp literal token
2214 if (type == MVT::f64) { // Expected 64-bit fp operand
2215 // We would set low 64-bits of literal to zeroes but we accept this literals
2216 return true;
2217 }
2218
2219 if (type == MVT::i64) { // Expected 64-bit int operand
2220 // We don't allow fp literals in 64-bit integer instructions. It is
2221 // unclear how we should encode them.
2222 return false;
2223 }
2224
2225 // We allow fp literals with f16x2 operands assuming that the specified
2226 // literal goes into the lower half and the upper half is zero. We also
2227 // require that the literal may be losslessly converted to f16.
2228 //
2229 // For i16x2 operands, we assume that the specified literal is encoded as a
2230 // single-precision float. This is pretty odd, but it matches SP3 and what
2231 // happens in hardware.
2232 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2233 : (type == MVT::v2i16) ? MVT::f32
2234 : (type == MVT::v2f32) ? MVT::f32
2235 : type;
2236
2237 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2238 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2239}
2240
2241bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2242 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2243}
2244
2245bool AMDGPUOperand::isVRegWithInputMods() const {
2246 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2247 // GFX90A allows DPP on 64-bit operands.
2248 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2249 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2250}
2251
2252template <bool IsFake16>
2253bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {
2254 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2255 : AMDGPU::VGPR_16_Lo128RegClassID);
2256}
2257
2258template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2259 return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2260 : AMDGPU::VGPR_16RegClassID);
2261}
2262
2263bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2264 if (AsmParser->isVI())
2265 return isVReg32();
2266 if (AsmParser->isGFX9Plus())
2267 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2268 return false;
2269}
2270
2271bool AMDGPUOperand::isSDWAFP16Operand() const {
2272 return isSDWAOperand(MVT::f16);
2273}
2274
2275bool AMDGPUOperand::isSDWAFP32Operand() const {
2276 return isSDWAOperand(MVT::f32);
2277}
2278
2279bool AMDGPUOperand::isSDWAInt16Operand() const {
2280 return isSDWAOperand(MVT::i16);
2281}
2282
2283bool AMDGPUOperand::isSDWAInt32Operand() const {
2284 return isSDWAOperand(MVT::i32);
2285}
2286
2287bool AMDGPUOperand::isBoolReg() const {
2288 return isReg() && ((AsmParser->isWave64() && isSCSrc_b64()) ||
2289 (AsmParser->isWave32() && isSCSrc_b32()));
2290}
2291
2292uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2293{
2294 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2295 assert(Size == 2 || Size == 4 || Size == 8);
2296
2297 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2298
2299 if (Imm.Mods.Abs) {
2300 Val &= ~FpSignMask;
2301 }
2302 if (Imm.Mods.Neg) {
2303 Val ^= FpSignMask;
2304 }
2305
2306 return Val;
2307}
2308
2309void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2310 MCOpIdx = Inst.getNumOperands();
2311
2312 if (isExpr()) {
2314 return;
2315 }
2316
2317 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2318 Inst.getNumOperands())) {
2319 addLiteralImmOperand(Inst, Imm.Val,
2320 ApplyModifiers &
2321 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2322 } else {
2323 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2325 }
2326}
2327
2328void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2329 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2330 auto OpNum = Inst.getNumOperands();
2331 // Check that this operand accepts literals
2332 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2333
2334 if (ApplyModifiers) {
2335 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2336 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2337 Val = applyInputFPModifiers(Val, Size);
2338 }
2339
2340 APInt Literal(64, Val);
2341 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2342
2343 bool CanUse64BitLiterals =
2344 AsmParser->has64BitLiterals() &&
2345 !(InstDesc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P));
2346 LitModifier Lit = getModifiers().Lit;
2347 MCContext &Ctx = AsmParser->getContext();
2348
2349 if (Imm.IsFPImm) { // We got fp literal token
2350 switch (OpTy) {
2356 if (Lit == LitModifier::None &&
2358 AsmParser->hasInv2PiInlineImm())) {
2359 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2360 return;
2361 }
2362
2363 // Non-inlineable
2364 if (AMDGPU::isSISrcFPOperand(InstDesc,
2365 OpNum)) { // Expected 64-bit fp operand
2366 bool HasMandatoryLiteral =
2367 AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::imm);
2368 // For fp operands we check if low 32 bits are zeros
2369 if (Literal.getLoBits(32) != 0 &&
2370 (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
2371 !HasMandatoryLiteral) {
2372 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(
2373 Inst.getLoc(),
2374 "Can't encode literal as exact 64-bit floating-point operand. "
2375 "Low 32-bits will be set to zero");
2376 Val &= 0xffffffff00000000u;
2377 }
2378
2379 if ((OpTy == AMDGPU::OPERAND_REG_IMM_FP64 ||
2382 if (CanUse64BitLiterals && Lit == LitModifier::None &&
2383 (isInt<32>(Val) || isUInt<32>(Val))) {
2384 // The floating-point operand will be verbalized as an
2385 // integer one. If that integer happens to fit 32 bits, on
2386 // re-assembling it will be intepreted as the high half of
2387 // the actual value, so we have to wrap it into lit64().
2388 Lit = LitModifier::Lit64;
2389 } else if (Lit == LitModifier::Lit) {
2390 // For FP64 operands lit() specifies the high half of the value.
2391 Val = Hi_32(Val);
2392 }
2393 }
2394 break;
2395 }
2396
2397 // We don't allow fp literals in 64-bit integer instructions. It is
2398 // unclear how we should encode them. This case should be checked earlier
2399 // in predicate methods (isLiteralImm())
2400 llvm_unreachable("fp literal in 64-bit integer instruction.");
2401
2403 if (CanUse64BitLiterals && Lit == LitModifier::None &&
2404 (isInt<32>(Val) || isUInt<32>(Val)))
2405 Lit = LitModifier::Lit64;
2406 break;
2407
2412 if (Lit == LitModifier::None && AsmParser->hasInv2PiInlineImm() &&
2413 Literal == 0x3fc45f306725feed) {
2414 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2415 // loss of precision. The constant represents ideomatic fp32 value of
2416 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2417 // bits. Prevent rounding below.
2418 Inst.addOperand(MCOperand::createImm(0x3e22));
2419 return;
2420 }
2421 [[fallthrough]];
2422
2443 bool lost;
2444 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2445 // Convert literal to single precision
2446 FPLiteral.convert(*getOpFltSemantics(OpTy),
2447 APFloat::rmNearestTiesToEven, &lost);
2448 // We allow precision lost but not overflow or underflow. This should be
2449 // checked earlier in isLiteralImm()
2450
2451 Val = FPLiteral.bitcastToAPInt().getZExtValue();
2452 break;
2453 }
2454 default:
2455 llvm_unreachable("invalid operand size");
2456 }
2457
2458 if (Lit != LitModifier::None) {
2459 Inst.addOperand(
2461 } else {
2463 }
2464 return;
2465 }
2466
2467 // We got int literal token.
2468 // Only sign extend inline immediates.
2469 switch (OpTy) {
2483 break;
2484
2487 if (Lit == LitModifier::None &&
2488 AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2490 return;
2491 }
2492
2493 // When the 32 MSBs are not zero (effectively means it can't be safely
2494 // truncated to uint32_t), if the target doesn't support 64-bit literals, or
2495 // the lit modifier is explicitly used, we need to truncate it to the 32
2496 // LSBs.
2497 if (!AsmParser->has64BitLiterals() || Lit == LitModifier::Lit)
2498 Val = Lo_32(Val);
2499 break;
2500
2504 if (Lit == LitModifier::None &&
2505 AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2507 return;
2508 }
2509
2510 // If the target doesn't support 64-bit literals, we need to use the
2511 // constant as the high 32 MSBs of a double-precision floating point value.
2512 if (!AsmParser->has64BitLiterals()) {
2513 Val = static_cast<uint64_t>(Val) << 32;
2514 } else {
2515 // Now the target does support 64-bit literals, there are two cases
2516 // where we still want to use src_literal encoding:
2517 // 1) explicitly forced by using lit modifier;
2518 // 2) the value is a valid 32-bit representation (signed or unsigned),
2519 // meanwhile not forced by lit64 modifier.
2520 if (Lit == LitModifier::Lit ||
2521 (Lit != LitModifier::Lit64 && (isInt<32>(Val) || isUInt<32>(Val))))
2522 Val = static_cast<uint64_t>(Val) << 32;
2523 }
2524
2525 // For FP64 operands lit() specifies the high half of the value.
2526 if (Lit == LitModifier::Lit)
2527 Val = Hi_32(Val);
2528 break;
2529
2541 break;
2542
2544 if ((isInt<32>(Val) || isUInt<32>(Val)) && Lit != LitModifier::Lit64)
2545 Val <<= 32;
2546 break;
2547
2548 default:
2549 llvm_unreachable("invalid operand type");
2550 }
2551
2552 if (Lit != LitModifier::None) {
2553 Inst.addOperand(
2555 } else {
2557 }
2558}
2559
2560void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2561 MCOpIdx = Inst.getNumOperands();
2562 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2563}
2564
2565bool AMDGPUOperand::isInlineValue() const {
2566 return isRegKind() && ::isInlineValue(getReg());
2567}
2568
2569//===----------------------------------------------------------------------===//
2570// AsmParser
2571//===----------------------------------------------------------------------===//
2572
2573void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2574 // TODO: make those pre-defined variables read-only.
2575 // Currently there is none suitable machinery in the core llvm-mc for this.
2576 // MCSymbol::isRedefinable is intended for another purpose, and
2577 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2578 MCContext &Ctx = getContext();
2579 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2581}
2582
2583static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2584 if (Is == IS_VGPR) {
2585 switch (RegWidth) {
2586 default: return -1;
2587 case 32:
2588 return AMDGPU::VGPR_32RegClassID;
2589 case 64:
2590 return AMDGPU::VReg_64RegClassID;
2591 case 96:
2592 return AMDGPU::VReg_96RegClassID;
2593 case 128:
2594 return AMDGPU::VReg_128RegClassID;
2595 case 160:
2596 return AMDGPU::VReg_160RegClassID;
2597 case 192:
2598 return AMDGPU::VReg_192RegClassID;
2599 case 224:
2600 return AMDGPU::VReg_224RegClassID;
2601 case 256:
2602 return AMDGPU::VReg_256RegClassID;
2603 case 288:
2604 return AMDGPU::VReg_288RegClassID;
2605 case 320:
2606 return AMDGPU::VReg_320RegClassID;
2607 case 352:
2608 return AMDGPU::VReg_352RegClassID;
2609 case 384:
2610 return AMDGPU::VReg_384RegClassID;
2611 case 512:
2612 return AMDGPU::VReg_512RegClassID;
2613 case 1024:
2614 return AMDGPU::VReg_1024RegClassID;
2615 }
2616 } else if (Is == IS_TTMP) {
2617 switch (RegWidth) {
2618 default: return -1;
2619 case 32:
2620 return AMDGPU::TTMP_32RegClassID;
2621 case 64:
2622 return AMDGPU::TTMP_64RegClassID;
2623 case 128:
2624 return AMDGPU::TTMP_128RegClassID;
2625 case 256:
2626 return AMDGPU::TTMP_256RegClassID;
2627 case 512:
2628 return AMDGPU::TTMP_512RegClassID;
2629 }
2630 } else if (Is == IS_SGPR) {
2631 switch (RegWidth) {
2632 default: return -1;
2633 case 32:
2634 return AMDGPU::SGPR_32RegClassID;
2635 case 64:
2636 return AMDGPU::SGPR_64RegClassID;
2637 case 96:
2638 return AMDGPU::SGPR_96RegClassID;
2639 case 128:
2640 return AMDGPU::SGPR_128RegClassID;
2641 case 160:
2642 return AMDGPU::SGPR_160RegClassID;
2643 case 192:
2644 return AMDGPU::SGPR_192RegClassID;
2645 case 224:
2646 return AMDGPU::SGPR_224RegClassID;
2647 case 256:
2648 return AMDGPU::SGPR_256RegClassID;
2649 case 288:
2650 return AMDGPU::SGPR_288RegClassID;
2651 case 320:
2652 return AMDGPU::SGPR_320RegClassID;
2653 case 352:
2654 return AMDGPU::SGPR_352RegClassID;
2655 case 384:
2656 return AMDGPU::SGPR_384RegClassID;
2657 case 512:
2658 return AMDGPU::SGPR_512RegClassID;
2659 }
2660 } else if (Is == IS_AGPR) {
2661 switch (RegWidth) {
2662 default: return -1;
2663 case 32:
2664 return AMDGPU::AGPR_32RegClassID;
2665 case 64:
2666 return AMDGPU::AReg_64RegClassID;
2667 case 96:
2668 return AMDGPU::AReg_96RegClassID;
2669 case 128:
2670 return AMDGPU::AReg_128RegClassID;
2671 case 160:
2672 return AMDGPU::AReg_160RegClassID;
2673 case 192:
2674 return AMDGPU::AReg_192RegClassID;
2675 case 224:
2676 return AMDGPU::AReg_224RegClassID;
2677 case 256:
2678 return AMDGPU::AReg_256RegClassID;
2679 case 288:
2680 return AMDGPU::AReg_288RegClassID;
2681 case 320:
2682 return AMDGPU::AReg_320RegClassID;
2683 case 352:
2684 return AMDGPU::AReg_352RegClassID;
2685 case 384:
2686 return AMDGPU::AReg_384RegClassID;
2687 case 512:
2688 return AMDGPU::AReg_512RegClassID;
2689 case 1024:
2690 return AMDGPU::AReg_1024RegClassID;
2691 }
2692 }
2693 return -1;
2694}
2695
2698 .Case("exec", AMDGPU::EXEC)
2699 .Case("vcc", AMDGPU::VCC)
2700 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2701 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2702 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2703 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2704 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2705 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2706 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2707 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2708 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2709 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2710 .Case("src_flat_scratch_base_lo", AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)
2711 .Case("src_flat_scratch_base_hi", AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)
2712 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2713 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2714 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2715 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2716 .Case("m0", AMDGPU::M0)
2717 .Case("vccz", AMDGPU::SRC_VCCZ)
2718 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2719 .Case("execz", AMDGPU::SRC_EXECZ)
2720 .Case("src_execz", AMDGPU::SRC_EXECZ)
2721 .Case("scc", AMDGPU::SRC_SCC)
2722 .Case("src_scc", AMDGPU::SRC_SCC)
2723 .Case("tba", AMDGPU::TBA)
2724 .Case("tma", AMDGPU::TMA)
2725 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2726 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2727 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2728 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2729 .Case("vcc_lo", AMDGPU::VCC_LO)
2730 .Case("vcc_hi", AMDGPU::VCC_HI)
2731 .Case("exec_lo", AMDGPU::EXEC_LO)
2732 .Case("exec_hi", AMDGPU::EXEC_HI)
2733 .Case("tma_lo", AMDGPU::TMA_LO)
2734 .Case("tma_hi", AMDGPU::TMA_HI)
2735 .Case("tba_lo", AMDGPU::TBA_LO)
2736 .Case("tba_hi", AMDGPU::TBA_HI)
2737 .Case("pc", AMDGPU::PC_REG)
2738 .Case("null", AMDGPU::SGPR_NULL)
2739 .Default(AMDGPU::NoRegister);
2740}
2741
2742bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2743 SMLoc &EndLoc, bool RestoreOnFailure) {
2744 auto R = parseRegister();
2745 if (!R) return true;
2746 assert(R->isReg());
2747 RegNo = R->getReg();
2748 StartLoc = R->getStartLoc();
2749 EndLoc = R->getEndLoc();
2750 return false;
2751}
2752
2753bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2754 SMLoc &EndLoc) {
2755 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2756}
2757
2758ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2759 SMLoc &EndLoc) {
2760 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2761 bool PendingErrors = getParser().hasPendingError();
2762 getParser().clearPendingErrors();
2763 if (PendingErrors)
2764 return ParseStatus::Failure;
2765 if (Result)
2766 return ParseStatus::NoMatch;
2767 return ParseStatus::Success;
2768}
2769
2770bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
2771 RegisterKind RegKind,
2772 MCRegister Reg1, SMLoc Loc) {
2773 switch (RegKind) {
2774 case IS_SPECIAL:
2775 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2776 Reg = AMDGPU::EXEC;
2777 RegWidth = 64;
2778 return true;
2779 }
2780 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2781 Reg = AMDGPU::FLAT_SCR;
2782 RegWidth = 64;
2783 return true;
2784 }
2785 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2786 Reg = AMDGPU::XNACK_MASK;
2787 RegWidth = 64;
2788 return true;
2789 }
2790 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2791 Reg = AMDGPU::VCC;
2792 RegWidth = 64;
2793 return true;
2794 }
2795 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2796 Reg = AMDGPU::TBA;
2797 RegWidth = 64;
2798 return true;
2799 }
2800 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2801 Reg = AMDGPU::TMA;
2802 RegWidth = 64;
2803 return true;
2804 }
2805 Error(Loc, "register does not fit in the list");
2806 return false;
2807 case IS_VGPR:
2808 case IS_SGPR:
2809 case IS_AGPR:
2810 case IS_TTMP:
2811 if (Reg1 != Reg + RegWidth / 32) {
2812 Error(Loc, "registers in a list must have consecutive indices");
2813 return false;
2814 }
2815 RegWidth += 32;
2816 return true;
2817 default:
2818 llvm_unreachable("unexpected register kind");
2819 }
2820}
2821
2822struct RegInfo {
2824 RegisterKind Kind;
2825};
2826
2827static constexpr RegInfo RegularRegisters[] = {
2828 {{"v"}, IS_VGPR},
2829 {{"s"}, IS_SGPR},
2830 {{"ttmp"}, IS_TTMP},
2831 {{"acc"}, IS_AGPR},
2832 {{"a"}, IS_AGPR},
2833};
2834
2835static bool isRegularReg(RegisterKind Kind) {
2836 return Kind == IS_VGPR ||
2837 Kind == IS_SGPR ||
2838 Kind == IS_TTMP ||
2839 Kind == IS_AGPR;
2840}
2841
2843 for (const RegInfo &Reg : RegularRegisters)
2844 if (Str.starts_with(Reg.Name))
2845 return &Reg;
2846 return nullptr;
2847}
2848
2849static bool getRegNum(StringRef Str, unsigned& Num) {
2850 return !Str.getAsInteger(10, Num);
2851}
2852
2853bool
2854AMDGPUAsmParser::isRegister(const AsmToken &Token,
2855 const AsmToken &NextToken) const {
2856
2857 // A list of consecutive registers: [s0,s1,s2,s3]
2858 if (Token.is(AsmToken::LBrac))
2859 return true;
2860
2861 if (!Token.is(AsmToken::Identifier))
2862 return false;
2863
2864 // A single register like s0 or a range of registers like s[0:1]
2865
2866 StringRef Str = Token.getString();
2867 const RegInfo *Reg = getRegularRegInfo(Str);
2868 if (Reg) {
2869 StringRef RegName = Reg->Name;
2870 StringRef RegSuffix = Str.substr(RegName.size());
2871 if (!RegSuffix.empty()) {
2872 RegSuffix.consume_back(".l");
2873 RegSuffix.consume_back(".h");
2874 unsigned Num;
2875 // A single register with an index: rXX
2876 if (getRegNum(RegSuffix, Num))
2877 return true;
2878 } else {
2879 // A range of registers: r[XX:YY].
2880 if (NextToken.is(AsmToken::LBrac))
2881 return true;
2882 }
2883 }
2884
2885 return getSpecialRegForName(Str).isValid();
2886}
2887
2888bool
2889AMDGPUAsmParser::isRegister()
2890{
2891 return isRegister(getToken(), peekToken());
2892}
2893
2894MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2895 unsigned SubReg, unsigned RegWidth,
2896 SMLoc Loc) {
2897 assert(isRegularReg(RegKind));
2898
2899 unsigned AlignSize = 1;
2900 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2901 // SGPR and TTMP registers must be aligned.
2902 // Max required alignment is 4 dwords.
2903 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2904 }
2905
2906 if (RegNum % AlignSize != 0) {
2907 Error(Loc, "invalid register alignment");
2908 return MCRegister();
2909 }
2910
2911 unsigned RegIdx = RegNum / AlignSize;
2912 int RCID = getRegClass(RegKind, RegWidth);
2913 if (RCID == -1) {
2914 Error(Loc, "invalid or unsupported register size");
2915 return MCRegister();
2916 }
2917
2918 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2919 const MCRegisterClass RC = TRI->getRegClass(RCID);
2920 if (RegIdx >= RC.getNumRegs() || (RegKind == IS_VGPR && RegIdx > 255)) {
2921 Error(Loc, "register index is out of range");
2922 return AMDGPU::NoRegister;
2923 }
2924
2925 if (RegKind == IS_VGPR && !isGFX1250() && RegIdx + RegWidth / 32 > 256) {
2926 Error(Loc, "register index is out of range");
2927 return MCRegister();
2928 }
2929
2930 MCRegister Reg = RC.getRegister(RegIdx);
2931
2932 if (SubReg) {
2933 Reg = TRI->getSubReg(Reg, SubReg);
2934
2935 // Currently all regular registers have their .l and .h subregisters, so
2936 // we should never need to generate an error here.
2937 assert(Reg && "Invalid subregister!");
2938 }
2939
2940 return Reg;
2941}
2942
2943bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth,
2944 unsigned &SubReg) {
2945 int64_t RegLo, RegHi;
2946 if (!skipToken(AsmToken::LBrac, "missing register index"))
2947 return false;
2948
2949 SMLoc FirstIdxLoc = getLoc();
2950 SMLoc SecondIdxLoc;
2951
2952 if (!parseExpr(RegLo))
2953 return false;
2954
2955 if (trySkipToken(AsmToken::Colon)) {
2956 SecondIdxLoc = getLoc();
2957 if (!parseExpr(RegHi))
2958 return false;
2959 } else {
2960 RegHi = RegLo;
2961 }
2962
2963 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2964 return false;
2965
2966 if (!isUInt<32>(RegLo)) {
2967 Error(FirstIdxLoc, "invalid register index");
2968 return false;
2969 }
2970
2971 if (!isUInt<32>(RegHi)) {
2972 Error(SecondIdxLoc, "invalid register index");
2973 return false;
2974 }
2975
2976 if (RegLo > RegHi) {
2977 Error(FirstIdxLoc, "first register index should not exceed second index");
2978 return false;
2979 }
2980
2981 if (RegHi == RegLo) {
2982 StringRef RegSuffix = getTokenStr();
2983 if (RegSuffix == ".l") {
2984 SubReg = AMDGPU::lo16;
2985 lex();
2986 } else if (RegSuffix == ".h") {
2987 SubReg = AMDGPU::hi16;
2988 lex();
2989 }
2990 }
2991
2992 Num = static_cast<unsigned>(RegLo);
2993 RegWidth = 32 * ((RegHi - RegLo) + 1);
2994
2995 return true;
2996}
2997
2998MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2999 unsigned &RegNum,
3000 unsigned &RegWidth,
3001 SmallVectorImpl<AsmToken> &Tokens) {
3002 assert(isToken(AsmToken::Identifier));
3003 MCRegister Reg = getSpecialRegForName(getTokenStr());
3004 if (Reg) {
3005 RegNum = 0;
3006 RegWidth = 32;
3007 RegKind = IS_SPECIAL;
3008 Tokens.push_back(getToken());
3009 lex(); // skip register name
3010 }
3011 return Reg;
3012}
3013
3014MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
3015 unsigned &RegNum,
3016 unsigned &RegWidth,
3017 SmallVectorImpl<AsmToken> &Tokens) {
3018 assert(isToken(AsmToken::Identifier));
3019 StringRef RegName = getTokenStr();
3020 auto Loc = getLoc();
3021
3022 const RegInfo *RI = getRegularRegInfo(RegName);
3023 if (!RI) {
3024 Error(Loc, "invalid register name");
3025 return MCRegister();
3026 }
3027
3028 Tokens.push_back(getToken());
3029 lex(); // skip register name
3030
3031 RegKind = RI->Kind;
3032 StringRef RegSuffix = RegName.substr(RI->Name.size());
3033 unsigned SubReg = NoSubRegister;
3034 if (!RegSuffix.empty()) {
3035 if (RegSuffix.consume_back(".l"))
3036 SubReg = AMDGPU::lo16;
3037 else if (RegSuffix.consume_back(".h"))
3038 SubReg = AMDGPU::hi16;
3039
3040 // Single 32-bit register: vXX.
3041 if (!getRegNum(RegSuffix, RegNum)) {
3042 Error(Loc, "invalid register index");
3043 return MCRegister();
3044 }
3045 RegWidth = 32;
3046 } else {
3047 // Range of registers: v[XX:YY]. ":YY" is optional.
3048 if (!ParseRegRange(RegNum, RegWidth, SubReg))
3049 return MCRegister();
3050 }
3051
3052 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
3053}
3054
3055MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
3056 unsigned &RegNum, unsigned &RegWidth,
3057 SmallVectorImpl<AsmToken> &Tokens) {
3058 MCRegister Reg;
3059 auto ListLoc = getLoc();
3060
3061 if (!skipToken(AsmToken::LBrac,
3062 "expected a register or a list of registers")) {
3063 return MCRegister();
3064 }
3065
3066 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
3067
3068 auto Loc = getLoc();
3069 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
3070 return MCRegister();
3071 if (RegWidth != 32) {
3072 Error(Loc, "expected a single 32-bit register");
3073 return MCRegister();
3074 }
3075
3076 for (; trySkipToken(AsmToken::Comma); ) {
3077 RegisterKind NextRegKind;
3078 MCRegister NextReg;
3079 unsigned NextRegNum, NextRegWidth;
3080 Loc = getLoc();
3081
3082 if (!ParseAMDGPURegister(NextRegKind, NextReg,
3083 NextRegNum, NextRegWidth,
3084 Tokens)) {
3085 return MCRegister();
3086 }
3087 if (NextRegWidth != 32) {
3088 Error(Loc, "expected a single 32-bit register");
3089 return MCRegister();
3090 }
3091 if (NextRegKind != RegKind) {
3092 Error(Loc, "registers in a list must be of the same kind");
3093 return MCRegister();
3094 }
3095 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
3096 return MCRegister();
3097 }
3098
3099 if (!skipToken(AsmToken::RBrac,
3100 "expected a comma or a closing square bracket")) {
3101 return MCRegister();
3102 }
3103
3104 if (isRegularReg(RegKind))
3105 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3106
3107 return Reg;
3108}
3109
3110bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3111 MCRegister &Reg, unsigned &RegNum,
3112 unsigned &RegWidth,
3113 SmallVectorImpl<AsmToken> &Tokens) {
3114 auto Loc = getLoc();
3115 Reg = MCRegister();
3116
3117 if (isToken(AsmToken::Identifier)) {
3118 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3119 if (!Reg)
3120 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3121 } else {
3122 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3123 }
3124
3125 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3126 if (!Reg) {
3127 assert(Parser.hasPendingError());
3128 return false;
3129 }
3130
3131 if (!subtargetHasRegister(*TRI, Reg)) {
3132 if (Reg == AMDGPU::SGPR_NULL) {
3133 Error(Loc, "'null' operand is not supported on this GPU");
3134 } else {
3136 " register not available on this GPU");
3137 }
3138 return false;
3139 }
3140
3141 return true;
3142}
3143
3144bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3145 MCRegister &Reg, unsigned &RegNum,
3146 unsigned &RegWidth,
3147 bool RestoreOnFailure /*=false*/) {
3148 Reg = MCRegister();
3149
3151 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3152 if (RestoreOnFailure) {
3153 while (!Tokens.empty()) {
3154 getLexer().UnLex(Tokens.pop_back_val());
3155 }
3156 }
3157 return true;
3158 }
3159 return false;
3160}
3161
3162std::optional<StringRef>
3163AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3164 switch (RegKind) {
3165 case IS_VGPR:
3166 return StringRef(".amdgcn.next_free_vgpr");
3167 case IS_SGPR:
3168 return StringRef(".amdgcn.next_free_sgpr");
3169 default:
3170 return std::nullopt;
3171 }
3172}
3173
3174void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3175 auto SymbolName = getGprCountSymbolName(RegKind);
3176 assert(SymbolName && "initializing invalid register kind");
3177 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3179 Sym->setRedefinable(true);
3180}
3181
3182bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3183 unsigned DwordRegIndex,
3184 unsigned RegWidth) {
3185 // Symbols are only defined for GCN targets
3186 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3187 return true;
3188
3189 auto SymbolName = getGprCountSymbolName(RegKind);
3190 if (!SymbolName)
3191 return true;
3192 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3193
3194 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3195 int64_t OldCount;
3196
3197 if (!Sym->isVariable())
3198 return !Error(getLoc(),
3199 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3200 if (!Sym->getVariableValue()->evaluateAsAbsolute(OldCount))
3201 return !Error(
3202 getLoc(),
3203 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3204
3205 if (OldCount <= NewMax)
3207
3208 return true;
3209}
3210
3211std::unique_ptr<AMDGPUOperand>
3212AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3213 const auto &Tok = getToken();
3214 SMLoc StartLoc = Tok.getLoc();
3215 SMLoc EndLoc = Tok.getEndLoc();
3216 RegisterKind RegKind;
3217 MCRegister Reg;
3218 unsigned RegNum, RegWidth;
3219
3220 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3221 return nullptr;
3222 }
3223 if (isHsaAbi(getSTI())) {
3224 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3225 return nullptr;
3226 } else
3227 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3228 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3229}
3230
3231ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3232 bool HasSP3AbsModifier, LitModifier Lit) {
3233 // TODO: add syntactic sugar for 1/(2*PI)
3234
3235 if (isRegister() || isModifier())
3236 return ParseStatus::NoMatch;
3237
3238 if (Lit == LitModifier::None) {
3239 if (trySkipId("lit"))
3240 Lit = LitModifier::Lit;
3241 else if (trySkipId("lit64"))
3242 Lit = LitModifier::Lit64;
3243
3244 if (Lit != LitModifier::None) {
3245 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3246 return ParseStatus::Failure;
3247 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, Lit);
3248 if (S.isSuccess() &&
3249 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3250 return ParseStatus::Failure;
3251 return S;
3252 }
3253 }
3254
3255 const auto& Tok = getToken();
3256 const auto& NextTok = peekToken();
3257 bool IsReal = Tok.is(AsmToken::Real);
3258 SMLoc S = getLoc();
3259 bool Negate = false;
3260
3261 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3262 lex();
3263 IsReal = true;
3264 Negate = true;
3265 }
3266
3267 AMDGPUOperand::Modifiers Mods;
3268 Mods.Lit = Lit;
3269
3270 if (IsReal) {
3271 // Floating-point expressions are not supported.
3272 // Can only allow floating-point literals with an
3273 // optional sign.
3274
3275 StringRef Num = getTokenStr();
3276 lex();
3277
3278 APFloat RealVal(APFloat::IEEEdouble());
3279 auto roundMode = APFloat::rmNearestTiesToEven;
3280 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3281 return ParseStatus::Failure;
3282 if (Negate)
3283 RealVal.changeSign();
3284
3285 Operands.push_back(
3286 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3287 AMDGPUOperand::ImmTyNone, true));
3288 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3289 Op.setModifiers(Mods);
3290
3291 return ParseStatus::Success;
3292
3293 } else {
3294 int64_t IntVal;
3295 const MCExpr *Expr;
3296 SMLoc S = getLoc();
3297
3298 if (HasSP3AbsModifier) {
3299 // This is a workaround for handling expressions
3300 // as arguments of SP3 'abs' modifier, for example:
3301 // |1.0|
3302 // |-1|
3303 // |1+x|
3304 // This syntax is not compatible with syntax of standard
3305 // MC expressions (due to the trailing '|').
3306 SMLoc EndLoc;
3307 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3308 return ParseStatus::Failure;
3309 } else {
3310 if (Parser.parseExpression(Expr))
3311 return ParseStatus::Failure;
3312 }
3313
3314 if (Expr->evaluateAsAbsolute(IntVal)) {
3315 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3316 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3317 Op.setModifiers(Mods);
3318 } else {
3319 if (Lit != LitModifier::None)
3320 return ParseStatus::NoMatch;
3321 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3322 }
3323
3324 return ParseStatus::Success;
3325 }
3326
3327 return ParseStatus::NoMatch;
3328}
3329
3330ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3331 if (!isRegister())
3332 return ParseStatus::NoMatch;
3333
3334 if (auto R = parseRegister()) {
3335 assert(R->isReg());
3336 Operands.push_back(std::move(R));
3337 return ParseStatus::Success;
3338 }
3339 return ParseStatus::Failure;
3340}
3341
3342ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3343 bool HasSP3AbsMod, LitModifier Lit) {
3344 ParseStatus Res = parseReg(Operands);
3345 if (!Res.isNoMatch())
3346 return Res;
3347 if (isModifier())
3348 return ParseStatus::NoMatch;
3349 return parseImm(Operands, HasSP3AbsMod, Lit);
3350}
3351
3352bool
3353AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3354 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3355 const auto &str = Token.getString();
3356 return str == "abs" || str == "neg" || str == "sext";
3357 }
3358 return false;
3359}
3360
3361bool
3362AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3363 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3364}
3365
3366bool
3367AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3368 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3369}
3370
3371bool
3372AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3373 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3374}
3375
3376// Check if this is an operand modifier or an opcode modifier
3377// which may look like an expression but it is not. We should
3378// avoid parsing these modifiers as expressions. Currently
3379// recognized sequences are:
3380// |...|
3381// abs(...)
3382// neg(...)
3383// sext(...)
3384// -reg
3385// -|...|
3386// -abs(...)
3387// name:...
3388//
3389bool
3390AMDGPUAsmParser::isModifier() {
3391
3392 AsmToken Tok = getToken();
3393 AsmToken NextToken[2];
3394 peekTokens(NextToken);
3395
3396 return isOperandModifier(Tok, NextToken[0]) ||
3397 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3398 isOpcodeModifierWithVal(Tok, NextToken[0]);
3399}
3400
3401// Check if the current token is an SP3 'neg' modifier.
3402// Currently this modifier is allowed in the following context:
3403//
3404// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3405// 2. Before an 'abs' modifier: -abs(...)
3406// 3. Before an SP3 'abs' modifier: -|...|
3407//
3408// In all other cases "-" is handled as a part
3409// of an expression that follows the sign.
3410//
3411// Note: When "-" is followed by an integer literal,
3412// this is interpreted as integer negation rather
3413// than a floating-point NEG modifier applied to N.
3414// Beside being contr-intuitive, such use of floating-point
3415// NEG modifier would have resulted in different meaning
3416// of integer literals used with VOP1/2/C and VOP3,
3417// for example:
3418// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3419// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3420// Negative fp literals with preceding "-" are
3421// handled likewise for uniformity
3422//
3423bool
3424AMDGPUAsmParser::parseSP3NegModifier() {
3425
3426 AsmToken NextToken[2];
3427 peekTokens(NextToken);
3428
3429 if (isToken(AsmToken::Minus) &&
3430 (isRegister(NextToken[0], NextToken[1]) ||
3431 NextToken[0].is(AsmToken::Pipe) ||
3432 isId(NextToken[0], "abs"))) {
3433 lex();
3434 return true;
3435 }
3436
3437 return false;
3438}
3439
3440ParseStatus
3441AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3442 bool AllowImm) {
3443 bool Neg, SP3Neg;
3444 bool Abs, SP3Abs;
3445 SMLoc Loc;
3446
3447 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3448 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3449 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3450
3451 SP3Neg = parseSP3NegModifier();
3452
3453 Loc = getLoc();
3454 Neg = trySkipId("neg");
3455 if (Neg && SP3Neg)
3456 return Error(Loc, "expected register or immediate");
3457 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3458 return ParseStatus::Failure;
3459
3460 Abs = trySkipId("abs");
3461 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3462 return ParseStatus::Failure;
3463
3464 LitModifier Lit = LitModifier::None;
3465 if (trySkipId("lit")) {
3466 Lit = LitModifier::Lit;
3467 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3468 return ParseStatus::Failure;
3469 } else if (trySkipId("lit64")) {
3470 Lit = LitModifier::Lit64;
3471 if (!skipToken(AsmToken::LParen, "expected left paren after lit64"))
3472 return ParseStatus::Failure;
3473 if (!has64BitLiterals())
3474 return Error(Loc, "lit64 is not supported on this GPU");
3475 }
3476
3477 Loc = getLoc();
3478 SP3Abs = trySkipToken(AsmToken::Pipe);
3479 if (Abs && SP3Abs)
3480 return Error(Loc, "expected register or immediate");
3481
3482 ParseStatus Res;
3483 if (AllowImm) {
3484 Res = parseRegOrImm(Operands, SP3Abs, Lit);
3485 } else {
3486 Res = parseReg(Operands);
3487 }
3488 if (!Res.isSuccess())
3489 return (SP3Neg || Neg || SP3Abs || Abs || Lit != LitModifier::None)
3491 : Res;
3492
3493 if (Lit != LitModifier::None && !Operands.back()->isImm())
3494 Error(Loc, "expected immediate with lit modifier");
3495
3496 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3497 return ParseStatus::Failure;
3498 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3499 return ParseStatus::Failure;
3500 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3501 return ParseStatus::Failure;
3502 if (Lit != LitModifier::None &&
3503 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3504 return ParseStatus::Failure;
3505
3506 AMDGPUOperand::Modifiers Mods;
3507 Mods.Abs = Abs || SP3Abs;
3508 Mods.Neg = Neg || SP3Neg;
3509 Mods.Lit = Lit;
3510
3511 if (Mods.hasFPModifiers() || Lit != LitModifier::None) {
3512 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3513 if (Op.isExpr())
3514 return Error(Op.getStartLoc(), "expected an absolute expression");
3515 Op.setModifiers(Mods);
3516 }
3517 return ParseStatus::Success;
3518}
3519
3520ParseStatus
3521AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3522 bool AllowImm) {
3523 bool Sext = trySkipId("sext");
3524 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3525 return ParseStatus::Failure;
3526
3527 ParseStatus Res;
3528 if (AllowImm) {
3529 Res = parseRegOrImm(Operands);
3530 } else {
3531 Res = parseReg(Operands);
3532 }
3533 if (!Res.isSuccess())
3534 return Sext ? ParseStatus::Failure : Res;
3535
3536 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3537 return ParseStatus::Failure;
3538
3539 AMDGPUOperand::Modifiers Mods;
3540 Mods.Sext = Sext;
3541
3542 if (Mods.hasIntModifiers()) {
3543 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3544 if (Op.isExpr())
3545 return Error(Op.getStartLoc(), "expected an absolute expression");
3546 Op.setModifiers(Mods);
3547 }
3548
3549 return ParseStatus::Success;
3550}
3551
3552ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3553 return parseRegOrImmWithFPInputMods(Operands, false);
3554}
3555
3556ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3557 return parseRegOrImmWithIntInputMods(Operands, false);
3558}
3559
3560ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3561 auto Loc = getLoc();
3562 if (trySkipId("off")) {
3563 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3564 AMDGPUOperand::ImmTyOff, false));
3565 return ParseStatus::Success;
3566 }
3567
3568 if (!isRegister())
3569 return ParseStatus::NoMatch;
3570
3571 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3572 if (Reg) {
3573 Operands.push_back(std::move(Reg));
3574 return ParseStatus::Success;
3575 }
3576
3577 return ParseStatus::Failure;
3578}
3579
3580unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3581 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3582
3583 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3584 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3585 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3586 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3587 return Match_InvalidOperand;
3588
3589 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3590 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3591 // v_mac_f32/16 allow only dst_sel == DWORD;
3592 auto OpNum =
3593 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3594 const auto &Op = Inst.getOperand(OpNum);
3595 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3596 return Match_InvalidOperand;
3597 }
3598 }
3599
3600 // Asm can first try to match VOPD or VOPD3. By failing early here with
3601 // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
3602 // Checking later during validateInstruction does not give a chance to retry
3603 // parsing as a different encoding.
3604 if (tryAnotherVOPDEncoding(Inst))
3605 return Match_InvalidOperand;
3606
3607 return Match_Success;
3608}
3609
3619
3620// What asm variants we should check
3621ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3622 if (isForcedDPP() && isForcedVOP3()) {
3623 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3624 return ArrayRef(Variants);
3625 }
3626 if (getForcedEncodingSize() == 32) {
3627 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3628 return ArrayRef(Variants);
3629 }
3630
3631 if (isForcedVOP3()) {
3632 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3633 return ArrayRef(Variants);
3634 }
3635
3636 if (isForcedSDWA()) {
3637 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3639 return ArrayRef(Variants);
3640 }
3641
3642 if (isForcedDPP()) {
3643 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3644 return ArrayRef(Variants);
3645 }
3646
3647 return getAllVariants();
3648}
3649
3650StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3651 if (isForcedDPP() && isForcedVOP3())
3652 return "e64_dpp";
3653
3654 if (getForcedEncodingSize() == 32)
3655 return "e32";
3656
3657 if (isForcedVOP3())
3658 return "e64";
3659
3660 if (isForcedSDWA())
3661 return "sdwa";
3662
3663 if (isForcedDPP())
3664 return "dpp";
3665
3666 return "";
3667}
3668
3669unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3670 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3671 for (MCPhysReg Reg : Desc.implicit_uses()) {
3672 switch (Reg) {
3673 case AMDGPU::FLAT_SCR:
3674 case AMDGPU::VCC:
3675 case AMDGPU::VCC_LO:
3676 case AMDGPU::VCC_HI:
3677 case AMDGPU::M0:
3678 return Reg;
3679 default:
3680 break;
3681 }
3682 }
3683 return AMDGPU::NoRegister;
3684}
3685
3686// NB: This code is correct only when used to check constant
3687// bus limitations because GFX7 support no f16 inline constants.
3688// Note that there are no cases when a GFX7 opcode violates
3689// constant bus limitations due to the use of an f16 constant.
3690bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3691 unsigned OpIdx) const {
3692 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3693
3696 return false;
3697 }
3698
3699 const MCOperand &MO = Inst.getOperand(OpIdx);
3700
3701 int64_t Val = MO.isImm() ? MO.getImm() : getLitValue(MO.getExpr());
3702 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3703
3704 switch (OpSize) { // expected operand size
3705 case 8:
3706 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3707 case 4:
3708 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3709 case 2: {
3710 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3713 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3714
3718
3722
3726
3729 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3730
3733 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3734
3736 return false;
3737
3738 llvm_unreachable("invalid operand type");
3739 }
3740 default:
3741 llvm_unreachable("invalid operand size");
3742 }
3743}
3744
3745unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3746 if (!isGFX10Plus())
3747 return 1;
3748
3749 switch (Opcode) {
3750 // 64-bit shift instructions can use only one scalar value input
3751 case AMDGPU::V_LSHLREV_B64_e64:
3752 case AMDGPU::V_LSHLREV_B64_gfx10:
3753 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3754 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3755 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3756 case AMDGPU::V_LSHRREV_B64_e64:
3757 case AMDGPU::V_LSHRREV_B64_gfx10:
3758 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3759 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3760 case AMDGPU::V_ASHRREV_I64_e64:
3761 case AMDGPU::V_ASHRREV_I64_gfx10:
3762 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3763 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3764 case AMDGPU::V_LSHL_B64_e64:
3765 case AMDGPU::V_LSHR_B64_e64:
3766 case AMDGPU::V_ASHR_I64_e64:
3767 return 1;
3768 default:
3769 return 2;
3770 }
3771}
3772
3773constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3775
3776// Get regular operand indices in the same order as specified
3777// in the instruction (but append mandatory literals to the end).
3779 bool AddMandatoryLiterals = false) {
3780
3781 int16_t ImmIdx =
3782 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3783
3784 if (isVOPD(Opcode)) {
3785 int16_t ImmXIdx =
3786 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immX) : -1;
3787
3788 return {getNamedOperandIdx(Opcode, OpName::src0X),
3789 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3790 getNamedOperandIdx(Opcode, OpName::vsrc2X),
3791 getNamedOperandIdx(Opcode, OpName::src0Y),
3792 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3793 getNamedOperandIdx(Opcode, OpName::vsrc2Y),
3794 ImmXIdx,
3795 ImmIdx};
3796 }
3797
3798 return {getNamedOperandIdx(Opcode, OpName::src0),
3799 getNamedOperandIdx(Opcode, OpName::src1),
3800 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3801}
3802
3803bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3804 const MCOperand &MO = Inst.getOperand(OpIdx);
3805 if (MO.isImm())
3806 return !isInlineConstant(Inst, OpIdx);
3807 if (MO.isReg()) {
3808 auto Reg = MO.getReg();
3809 if (!Reg)
3810 return false;
3811 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3812 auto PReg = mc2PseudoReg(Reg);
3813 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3814 }
3815 return true;
3816}
3817
3818// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3819// Writelane is special in that it can use SGPR and M0 (which would normally
3820// count as using the constant bus twice - but in this case it is allowed since
3821// the lane selector doesn't count as a use of the constant bus). However, it is
3822// still required to abide by the 1 SGPR rule.
3823static bool checkWriteLane(const MCInst &Inst) {
3824 const unsigned Opcode = Inst.getOpcode();
3825 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3826 return false;
3827 const MCOperand &LaneSelOp = Inst.getOperand(2);
3828 if (!LaneSelOp.isReg())
3829 return false;
3830 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3831 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3832}
3833
3834bool AMDGPUAsmParser::validateConstantBusLimitations(
3835 const MCInst &Inst, const OperandVector &Operands) {
3836 const unsigned Opcode = Inst.getOpcode();
3837 const MCInstrDesc &Desc = MII.get(Opcode);
3838 MCRegister LastSGPR;
3839 unsigned ConstantBusUseCount = 0;
3840 unsigned NumLiterals = 0;
3841 unsigned LiteralSize;
3842
3843 if (!(Desc.TSFlags &
3846 !isVOPD(Opcode))
3847 return true;
3848
3849 if (checkWriteLane(Inst))
3850 return true;
3851
3852 // Check special imm operands (used by madmk, etc)
3853 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3854 ++NumLiterals;
3855 LiteralSize = 4;
3856 }
3857
3858 SmallDenseSet<unsigned> SGPRsUsed;
3859 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3860 if (SGPRUsed != AMDGPU::NoRegister) {
3861 SGPRsUsed.insert(SGPRUsed);
3862 ++ConstantBusUseCount;
3863 }
3864
3865 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3866
3867 unsigned ConstantBusLimit = getConstantBusLimit(Opcode);
3868
3869 for (int OpIdx : OpIndices) {
3870 if (OpIdx == -1)
3871 continue;
3872
3873 const MCOperand &MO = Inst.getOperand(OpIdx);
3874 if (usesConstantBus(Inst, OpIdx)) {
3875 if (MO.isReg()) {
3876 LastSGPR = mc2PseudoReg(MO.getReg());
3877 // Pairs of registers with a partial intersections like these
3878 // s0, s[0:1]
3879 // flat_scratch_lo, flat_scratch
3880 // flat_scratch_lo, flat_scratch_hi
3881 // are theoretically valid but they are disabled anyway.
3882 // Note that this code mimics SIInstrInfo::verifyInstruction
3883 if (SGPRsUsed.insert(LastSGPR).second) {
3884 ++ConstantBusUseCount;
3885 }
3886 } else { // Expression or a literal
3887
3888 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3889 continue; // special operand like VINTERP attr_chan
3890
3891 // An instruction may use only one literal.
3892 // This has been validated on the previous step.
3893 // See validateVOPLiteral.
3894 // This literal may be used as more than one operand.
3895 // If all these operands are of the same size,
3896 // this literal counts as one scalar value.
3897 // Otherwise it counts as 2 scalar values.
3898 // See "GFX10 Shader Programming", section 3.6.2.3.
3899
3901 if (Size < 4)
3902 Size = 4;
3903
3904 if (NumLiterals == 0) {
3905 NumLiterals = 1;
3906 LiteralSize = Size;
3907 } else if (LiteralSize != Size) {
3908 NumLiterals = 2;
3909 }
3910 }
3911 }
3912
3913 if (ConstantBusUseCount + NumLiterals > ConstantBusLimit) {
3914 Error(getOperandLoc(Operands, OpIdx),
3915 "invalid operand (violates constant bus restrictions)");
3916 return false;
3917 }
3918 }
3919 return true;
3920}
3921
3922std::optional<unsigned>
3923AMDGPUAsmParser::checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3) {
3924
3925 const unsigned Opcode = Inst.getOpcode();
3926 if (!isVOPD(Opcode))
3927 return {};
3928
3929 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3930
3931 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3932 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3933 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3934 ? Opr.getReg()
3935 : MCRegister();
3936 };
3937
3938 // On GFX12+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
3939 // source-cache.
3940 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
3941 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
3942 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250;
3943 bool AllowSameVGPR = isGFX1250();
3944
3945 if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
3946 for (auto OpName : {OpName::src0X, OpName::src0Y}) {
3947 int I = getNamedOperandIdx(Opcode, OpName);
3948 const MCOperand &Op = Inst.getOperand(I);
3949 if (!Op.isImm())
3950 continue;
3951 int64_t Imm = Op.getImm();
3952 if (!AMDGPU::isInlinableLiteral32(Imm, hasInv2PiInlineImm()) &&
3953 !AMDGPU::isInlinableLiteral64(Imm, hasInv2PiInlineImm()))
3954 return (unsigned)I;
3955 }
3956
3957 for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
3958 OpName::vsrc2Y, OpName::imm}) {
3959 int I = getNamedOperandIdx(Opcode, OpName);
3960 if (I == -1)
3961 continue;
3962 const MCOperand &Op = Inst.getOperand(I);
3963 if (Op.isImm())
3964 return (unsigned)I;
3965 }
3966 }
3967
3968 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3969 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
3970 getVRegIdx, *TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
3971
3972 return InvalidCompOprIdx;
3973}
3974
3975bool AMDGPUAsmParser::validateVOPD(const MCInst &Inst,
3976 const OperandVector &Operands) {
3977
3978 unsigned Opcode = Inst.getOpcode();
3979 bool AsVOPD3 = MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3;
3980
3981 if (AsVOPD3) {
3982 for (const std::unique_ptr<MCParsedAsmOperand> &Operand : Operands) {
3983 AMDGPUOperand &Op = (AMDGPUOperand &)*Operand;
3984 if ((Op.isRegKind() || Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
3985 (Op.getModifiers().getFPModifiersOperand() & SISrcMods::ABS))
3986 Error(Op.getStartLoc(), "ABS not allowed in VOPD3 instructions");
3987 }
3988 }
3989
3990 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
3991 if (!InvalidCompOprIdx.has_value())
3992 return true;
3993
3994 auto CompOprIdx = *InvalidCompOprIdx;
3995 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3996 auto ParsedIdx =
3997 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3998 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3999 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
4000
4001 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
4002 if (CompOprIdx == VOPD::Component::DST) {
4003 if (AsVOPD3)
4004 Error(Loc, "dst registers must be distinct");
4005 else
4006 Error(Loc, "one dst register must be even and the other odd");
4007 } else {
4008 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
4009 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
4010 " operands must use different VGPR banks");
4011 }
4012
4013 return false;
4014}
4015
4016// \returns true if \p Inst does not satisfy VOPD constraints, but can be
4017// potentially used as VOPD3 with the same operands.
4018bool AMDGPUAsmParser::tryVOPD3(const MCInst &Inst) {
4019 // First check if it fits VOPD
4020 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, false);
4021 if (!InvalidCompOprIdx.has_value())
4022 return false;
4023
4024 // Then if it fits VOPD3
4025 InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, true);
4026 if (InvalidCompOprIdx.has_value()) {
4027 // If failed operand is dst it is better to show error about VOPD3
4028 // instruction as it has more capabilities and error message will be
4029 // more informative. If the dst is not legal for VOPD3, then it is not
4030 // legal for VOPD either.
4031 if (*InvalidCompOprIdx == VOPD::Component::DST)
4032 return true;
4033
4034 // Otherwise prefer VOPD as we may find ourselves in an awkward situation
4035 // with a conflict in tied implicit src2 of fmac and no asm operand to
4036 // to point to.
4037 return false;
4038 }
4039 return true;
4040}
4041
4042// \returns true is a VOPD3 instruction can be also represented as a shorter
4043// VOPD encoding.
4044bool AMDGPUAsmParser::tryVOPD(const MCInst &Inst) {
4045 const unsigned Opcode = Inst.getOpcode();
4046 const auto &II = getVOPDInstInfo(Opcode, &MII);
4047 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(getSTI());
4048 if (!getCanBeVOPD(II[VOPD::X].getOpcode(), EncodingFamily, false).X ||
4049 !getCanBeVOPD(II[VOPD::Y].getOpcode(), EncodingFamily, false).Y)
4050 return false;
4051
4052 // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
4053 // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
4054 // be parsed as VOPD which does not accept src2.
4055 if (II[VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 ||
4056 II[VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32)
4057 return false;
4058
4059 // If any modifiers are set this cannot be VOPD.
4060 for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4061 OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4062 OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4063 int I = getNamedOperandIdx(Opcode, OpName);
4064 if (I == -1)
4065 continue;
4066 if (Inst.getOperand(I).getImm())
4067 return false;
4068 }
4069
4070 return !tryVOPD3(Inst);
4071}
4072
4073// VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
4074// form but switch to VOPD3 otherwise.
4075bool AMDGPUAsmParser::tryAnotherVOPDEncoding(const MCInst &Inst) {
4076 const unsigned Opcode = Inst.getOpcode();
4077 if (!isGFX1250() || !isVOPD(Opcode))
4078 return false;
4079
4080 if (MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3)
4081 return tryVOPD(Inst);
4082 return tryVOPD3(Inst);
4083}
4084
4085bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
4086
4087 const unsigned Opc = Inst.getOpcode();
4088 const MCInstrDesc &Desc = MII.get(Opc);
4089
4090 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
4091 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
4092 assert(ClampIdx != -1);
4093 return Inst.getOperand(ClampIdx).getImm() == 0;
4094 }
4095
4096 return true;
4097}
4098
4101
4102bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc) {
4103
4104 const unsigned Opc = Inst.getOpcode();
4105 const MCInstrDesc &Desc = MII.get(Opc);
4106
4107 if ((Desc.TSFlags & MIMGFlags) == 0)
4108 return true;
4109
4110 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
4111 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4112 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
4113
4114 if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample
4115 return true;
4116
4117 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
4118 return true;
4119
4120 unsigned VDataSize = getRegOperandSize(Desc, VDataIdx);
4121 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
4122 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4123 if (DMask == 0)
4124 DMask = 1;
4125
4126 bool IsPackedD16 = false;
4127 unsigned DataSize =
4128 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
4129 if (hasPackedD16()) {
4130 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4131 IsPackedD16 = D16Idx >= 0;
4132 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
4133 DataSize = (DataSize + 1) / 2;
4134 }
4135
4136 if ((VDataSize / 4) == DataSize + TFESize)
4137 return true;
4138
4139 StringRef Modifiers;
4140 if (isGFX90A())
4141 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
4142 else
4143 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
4144
4145 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
4146 return false;
4147}
4148
4149bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc) {
4150 const unsigned Opc = Inst.getOpcode();
4151 const MCInstrDesc &Desc = MII.get(Opc);
4152
4153 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
4154 return true;
4155
4156 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4157
4158 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4160 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
4161 AMDGPU::OpName RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG)
4162 ? AMDGPU::OpName::srsrc
4163 : AMDGPU::OpName::rsrc;
4164 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
4165 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4166 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
4167
4168 assert(VAddr0Idx != -1);
4169 assert(SrsrcIdx != -1);
4170 assert(SrsrcIdx > VAddr0Idx);
4171
4172 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
4173 if (BaseOpcode->BVH) {
4174 if (IsA16 == BaseOpcode->A16)
4175 return true;
4176 Error(IDLoc, "image address size does not match a16");
4177 return false;
4178 }
4179
4180 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4181 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4182 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
4183 unsigned ActualAddrSize =
4184 IsNSA ? SrsrcIdx - VAddr0Idx : getRegOperandSize(Desc, VAddr0Idx) / 4;
4185
4186 unsigned ExpectedAddrSize =
4187 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
4188
4189 if (IsNSA) {
4190 if (hasPartialNSAEncoding() &&
4191 ExpectedAddrSize >
4193 int VAddrLastIdx = SrsrcIdx - 1;
4194 unsigned VAddrLastSize = getRegOperandSize(Desc, VAddrLastIdx) / 4;
4195
4196 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
4197 }
4198 } else {
4199 if (ExpectedAddrSize > 12)
4200 ExpectedAddrSize = 16;
4201
4202 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
4203 // This provides backward compatibility for assembly created
4204 // before 160b/192b/224b types were directly supported.
4205 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4206 return true;
4207 }
4208
4209 if (ActualAddrSize == ExpectedAddrSize)
4210 return true;
4211
4212 Error(IDLoc, "image address size does not match dim and a16");
4213 return false;
4214}
4215
4216bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
4217
4218 const unsigned Opc = Inst.getOpcode();
4219 const MCInstrDesc &Desc = MII.get(Opc);
4220
4221 if ((Desc.TSFlags & MIMGFlags) == 0)
4222 return true;
4223 if (!Desc.mayLoad() || !Desc.mayStore())
4224 return true; // Not atomic
4225
4226 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4227 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4228
4229 // This is an incomplete check because image_atomic_cmpswap
4230 // may only use 0x3 and 0xf while other atomic operations
4231 // may use 0x1 and 0x3. However these limitations are
4232 // verified when we check that dmask matches dst size.
4233 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4234}
4235
4236bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4237
4238 const unsigned Opc = Inst.getOpcode();
4239 const MCInstrDesc &Desc = MII.get(Opc);
4240
4241 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4242 return true;
4243
4244 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4245 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4246
4247 // GATHER4 instructions use dmask in a different fashion compared to
4248 // other MIMG instructions. The only useful DMASK values are
4249 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4250 // (red,red,red,red) etc.) The ISA document doesn't mention
4251 // this.
4252 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4253}
4254
4255bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst,
4256 const OperandVector &Operands) {
4257 if (!isGFX10Plus())
4258 return true;
4259
4260 const unsigned Opc = Inst.getOpcode();
4261 const MCInstrDesc &Desc = MII.get(Opc);
4262
4263 if ((Desc.TSFlags & MIMGFlags) == 0)
4264 return true;
4265
4266 // image_bvh_intersect_ray instructions do not have dim
4268 return true;
4269
4270 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4271 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4272 if (Op.isDim())
4273 return true;
4274 }
4275 return false;
4276}
4277
4278bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4279 const unsigned Opc = Inst.getOpcode();
4280 const MCInstrDesc &Desc = MII.get(Opc);
4281
4282 if ((Desc.TSFlags & MIMGFlags) == 0)
4283 return true;
4284
4285 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4286 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4288
4289 if (!BaseOpcode->MSAA)
4290 return true;
4291
4292 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4293 assert(DimIdx != -1);
4294
4295 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4296 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4297
4298 return DimInfo->MSAA;
4299}
4300
4301static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4302{
4303 switch (Opcode) {
4304 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4305 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4306 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4307 return true;
4308 default:
4309 return false;
4310 }
4311}
4312
4313// movrels* opcodes should only allow VGPRS as src0.
4314// This is specified in .td description for vop1/vop3,
4315// but sdwa is handled differently. See isSDWAOperand.
4316bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4317 const OperandVector &Operands) {
4318
4319 const unsigned Opc = Inst.getOpcode();
4320 const MCInstrDesc &Desc = MII.get(Opc);
4321
4322 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4323 return true;
4324
4325 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4326 assert(Src0Idx != -1);
4327
4328 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4329 if (Src0.isReg()) {
4330 auto Reg = mc2PseudoReg(Src0.getReg());
4331 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4332 if (!isSGPR(Reg, TRI))
4333 return true;
4334 }
4335
4336 Error(getOperandLoc(Operands, Src0Idx), "source operand must be a VGPR");
4337 return false;
4338}
4339
4340bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4341 const OperandVector &Operands) {
4342
4343 const unsigned Opc = Inst.getOpcode();
4344
4345 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4346 return true;
4347
4348 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4349 assert(Src0Idx != -1);
4350
4351 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4352 if (!Src0.isReg())
4353 return true;
4354
4355 auto Reg = mc2PseudoReg(Src0.getReg());
4356 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4357 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4358 Error(getOperandLoc(Operands, Src0Idx),
4359 "source operand must be either a VGPR or an inline constant");
4360 return false;
4361 }
4362
4363 return true;
4364}
4365
4366bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4367 const OperandVector &Operands) {
4368 unsigned Opcode = Inst.getOpcode();
4369 const MCInstrDesc &Desc = MII.get(Opcode);
4370
4371 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4372 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4373 return true;
4374
4375 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4376 if (Src2Idx == -1)
4377 return true;
4378
4379 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4380 Error(getOperandLoc(Operands, Src2Idx),
4381 "inline constants are not allowed for this operand");
4382 return false;
4383 }
4384
4385 return true;
4386}
4387
4388bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4389 const OperandVector &Operands) {
4390 const unsigned Opc = Inst.getOpcode();
4391 const MCInstrDesc &Desc = MII.get(Opc);
4392
4393 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4394 return true;
4395
4396 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4397 if (BlgpIdx != -1) {
4398 if (const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(Opc)) {
4399 int CbszIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
4400
4401 unsigned CBSZ = Inst.getOperand(CbszIdx).getImm();
4402 unsigned BLGP = Inst.getOperand(BlgpIdx).getImm();
4403
4404 // Validate the correct register size was used for the floating point
4405 // format operands
4406
4407 bool Success = true;
4408 if (Info->NumRegsSrcA != mfmaScaleF8F6F4FormatToNumRegs(CBSZ)) {
4409 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4410 Error(getOperandLoc(Operands, Src0Idx),
4411 "wrong register tuple size for cbsz value " + Twine(CBSZ));
4412 Success = false;
4413 }
4414
4415 if (Info->NumRegsSrcB != mfmaScaleF8F6F4FormatToNumRegs(BLGP)) {
4416 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4417 Error(getOperandLoc(Operands, Src1Idx),
4418 "wrong register tuple size for blgp value " + Twine(BLGP));
4419 Success = false;
4420 }
4421
4422 return Success;
4423 }
4424 }
4425
4426 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4427 if (Src2Idx == -1)
4428 return true;
4429
4430 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4431 if (!Src2.isReg())
4432 return true;
4433
4434 MCRegister Src2Reg = Src2.getReg();
4435 MCRegister DstReg = Inst.getOperand(0).getReg();
4436 if (Src2Reg == DstReg)
4437 return true;
4438
4439 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4440 if (TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[0], HwMode))
4441 .getSizeInBits() <= 128)
4442 return true;
4443
4444 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4445 Error(getOperandLoc(Operands, Src2Idx),
4446 "source 2 operand must not partially overlap with dst");
4447 return false;
4448 }
4449
4450 return true;
4451}
4452
4453bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4454 switch (Inst.getOpcode()) {
4455 default:
4456 return true;
4457 case V_DIV_SCALE_F32_gfx6_gfx7:
4458 case V_DIV_SCALE_F32_vi:
4459 case V_DIV_SCALE_F32_gfx10:
4460 case V_DIV_SCALE_F64_gfx6_gfx7:
4461 case V_DIV_SCALE_F64_vi:
4462 case V_DIV_SCALE_F64_gfx10:
4463 break;
4464 }
4465
4466 // TODO: Check that src0 = src1 or src2.
4467
4468 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4469 AMDGPU::OpName::src2_modifiers,
4470 AMDGPU::OpName::src2_modifiers}) {
4471 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
4472 .getImm() &
4474 return false;
4475 }
4476 }
4477
4478 return true;
4479}
4480
4481bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4482
4483 const unsigned Opc = Inst.getOpcode();
4484 const MCInstrDesc &Desc = MII.get(Opc);
4485
4486 if ((Desc.TSFlags & MIMGFlags) == 0)
4487 return true;
4488
4489 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4490 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4491 if (isCI() || isSI())
4492 return false;
4493 }
4494
4495 return true;
4496}
4497
4498bool AMDGPUAsmParser::validateTensorR128(const MCInst &Inst) {
4499 const unsigned Opc = Inst.getOpcode();
4500 const MCInstrDesc &Desc = MII.get(Opc);
4501
4502 if ((Desc.TSFlags & SIInstrFlags::TENSOR_CNT) == 0)
4503 return true;
4504
4505 int R128Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
4506
4507 return R128Idx < 0 || !Inst.getOperand(R128Idx).getImm();
4508}
4509
4510static bool IsRevOpcode(const unsigned Opcode)
4511{
4512 switch (Opcode) {
4513 case AMDGPU::V_SUBREV_F32_e32:
4514 case AMDGPU::V_SUBREV_F32_e64:
4515 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4516 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4517 case AMDGPU::V_SUBREV_F32_e32_vi:
4518 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4519 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4520 case AMDGPU::V_SUBREV_F32_e64_vi:
4521
4522 case AMDGPU::V_SUBREV_CO_U32_e32:
4523 case AMDGPU::V_SUBREV_CO_U32_e64:
4524 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4525 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4526
4527 case AMDGPU::V_SUBBREV_U32_e32:
4528 case AMDGPU::V_SUBBREV_U32_e64:
4529 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4530 case AMDGPU::V_SUBBREV_U32_e32_vi:
4531 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4532 case AMDGPU::V_SUBBREV_U32_e64_vi:
4533
4534 case AMDGPU::V_SUBREV_U32_e32:
4535 case AMDGPU::V_SUBREV_U32_e64:
4536 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4537 case AMDGPU::V_SUBREV_U32_e32_vi:
4538 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4539 case AMDGPU::V_SUBREV_U32_e64_vi:
4540
4541 case AMDGPU::V_SUBREV_F16_e32:
4542 case AMDGPU::V_SUBREV_F16_e64:
4543 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4544 case AMDGPU::V_SUBREV_F16_e32_vi:
4545 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4546 case AMDGPU::V_SUBREV_F16_e64_vi:
4547
4548 case AMDGPU::V_SUBREV_U16_e32:
4549 case AMDGPU::V_SUBREV_U16_e64:
4550 case AMDGPU::V_SUBREV_U16_e32_vi:
4551 case AMDGPU::V_SUBREV_U16_e64_vi:
4552
4553 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4554 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4555 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4556
4557 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4558 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4559
4560 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4561 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4562
4563 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4564 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4565
4566 case AMDGPU::V_LSHRREV_B32_e32:
4567 case AMDGPU::V_LSHRREV_B32_e64:
4568 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4569 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4570 case AMDGPU::V_LSHRREV_B32_e32_vi:
4571 case AMDGPU::V_LSHRREV_B32_e64_vi:
4572 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4573 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4574
4575 case AMDGPU::V_ASHRREV_I32_e32:
4576 case AMDGPU::V_ASHRREV_I32_e64:
4577 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4578 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4579 case AMDGPU::V_ASHRREV_I32_e32_vi:
4580 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4581 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4582 case AMDGPU::V_ASHRREV_I32_e64_vi:
4583
4584 case AMDGPU::V_LSHLREV_B32_e32:
4585 case AMDGPU::V_LSHLREV_B32_e64:
4586 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4587 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4588 case AMDGPU::V_LSHLREV_B32_e32_vi:
4589 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4590 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4591 case AMDGPU::V_LSHLREV_B32_e64_vi:
4592
4593 case AMDGPU::V_LSHLREV_B16_e32:
4594 case AMDGPU::V_LSHLREV_B16_e64:
4595 case AMDGPU::V_LSHLREV_B16_e32_vi:
4596 case AMDGPU::V_LSHLREV_B16_e64_vi:
4597 case AMDGPU::V_LSHLREV_B16_gfx10:
4598
4599 case AMDGPU::V_LSHRREV_B16_e32:
4600 case AMDGPU::V_LSHRREV_B16_e64:
4601 case AMDGPU::V_LSHRREV_B16_e32_vi:
4602 case AMDGPU::V_LSHRREV_B16_e64_vi:
4603 case AMDGPU::V_LSHRREV_B16_gfx10:
4604
4605 case AMDGPU::V_ASHRREV_I16_e32:
4606 case AMDGPU::V_ASHRREV_I16_e64:
4607 case AMDGPU::V_ASHRREV_I16_e32_vi:
4608 case AMDGPU::V_ASHRREV_I16_e64_vi:
4609 case AMDGPU::V_ASHRREV_I16_gfx10:
4610
4611 case AMDGPU::V_LSHLREV_B64_e64:
4612 case AMDGPU::V_LSHLREV_B64_gfx10:
4613 case AMDGPU::V_LSHLREV_B64_vi:
4614
4615 case AMDGPU::V_LSHRREV_B64_e64:
4616 case AMDGPU::V_LSHRREV_B64_gfx10:
4617 case AMDGPU::V_LSHRREV_B64_vi:
4618
4619 case AMDGPU::V_ASHRREV_I64_e64:
4620 case AMDGPU::V_ASHRREV_I64_gfx10:
4621 case AMDGPU::V_ASHRREV_I64_vi:
4622
4623 case AMDGPU::V_PK_LSHLREV_B16:
4624 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4625 case AMDGPU::V_PK_LSHLREV_B16_vi:
4626
4627 case AMDGPU::V_PK_LSHRREV_B16:
4628 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4629 case AMDGPU::V_PK_LSHRREV_B16_vi:
4630 case AMDGPU::V_PK_ASHRREV_I16:
4631 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4632 case AMDGPU::V_PK_ASHRREV_I16_vi:
4633 return true;
4634 default:
4635 return false;
4636 }
4637}
4638
4639bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst,
4640 const OperandVector &Operands) {
4641 using namespace SIInstrFlags;
4642 const unsigned Opcode = Inst.getOpcode();
4643 const MCInstrDesc &Desc = MII.get(Opcode);
4644
4645 // lds_direct register is defined so that it can be used
4646 // with 9-bit operands only. Ignore encodings which do not accept these.
4647 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4648 if ((Desc.TSFlags & Enc) == 0)
4649 return true;
4650
4651 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4652 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4653 if (SrcIdx == -1)
4654 break;
4655 const auto &Src = Inst.getOperand(SrcIdx);
4656 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4657
4658 if (isGFX90A() || isGFX11Plus()) {
4659 Error(getOperandLoc(Operands, SrcIdx),
4660 "lds_direct is not supported on this GPU");
4661 return false;
4662 }
4663
4664 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) {
4665 Error(getOperandLoc(Operands, SrcIdx),
4666 "lds_direct cannot be used with this instruction");
4667 return false;
4668 }
4669
4670 if (SrcName != OpName::src0) {
4671 Error(getOperandLoc(Operands, SrcIdx),
4672 "lds_direct may be used as src0 only");
4673 return false;
4674 }
4675 }
4676 }
4677
4678 return true;
4679}
4680
4681SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4682 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4683 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4684 if (Op.isFlatOffset())
4685 return Op.getStartLoc();
4686 }
4687 return getLoc();
4688}
4689
4690bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4691 const OperandVector &Operands) {
4692 auto Opcode = Inst.getOpcode();
4693 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4694 if (OpNum == -1)
4695 return true;
4696
4697 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4698 if ((TSFlags & SIInstrFlags::FLAT))
4699 return validateFlatOffset(Inst, Operands);
4700
4701 if ((TSFlags & SIInstrFlags::SMRD))
4702 return validateSMEMOffset(Inst, Operands);
4703
4704 const auto &Op = Inst.getOperand(OpNum);
4705 // GFX12+ buffer ops: InstOffset is signed 24, but must not be a negative.
4706 if (isGFX12Plus() &&
4707 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4708 const unsigned OffsetSize = 24;
4709 if (!isUIntN(OffsetSize - 1, Op.getImm())) {
4710 Error(getFlatOffsetLoc(Operands),
4711 Twine("expected a ") + Twine(OffsetSize - 1) +
4712 "-bit unsigned offset for buffer ops");
4713 return false;
4714 }
4715 } else {
4716 const unsigned OffsetSize = 16;
4717 if (!isUIntN(OffsetSize, Op.getImm())) {
4718 Error(getFlatOffsetLoc(Operands),
4719 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4720 return false;
4721 }
4722 }
4723 return true;
4724}
4725
4726bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4727 const OperandVector &Operands) {
4728 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4729 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4730 return true;
4731
4732 auto Opcode = Inst.getOpcode();
4733 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4734 assert(OpNum != -1);
4735
4736 const auto &Op = Inst.getOperand(OpNum);
4737 if (!hasFlatOffsets() && Op.getImm() != 0) {
4738 Error(getFlatOffsetLoc(Operands),
4739 "flat offset modifier is not supported on this GPU");
4740 return false;
4741 }
4742
4743 // For pre-GFX12 FLAT instructions the offset must be positive;
4744 // MSB is ignored and forced to zero.
4745 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4746 bool AllowNegative =
4748 isGFX12Plus();
4749 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4750 Error(getFlatOffsetLoc(Operands),
4751 Twine("expected a ") +
4752 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4753 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4754 return false;
4755 }
4756
4757 return true;
4758}
4759
4760SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4761 // Start with second operand because SMEM Offset cannot be dst or src0.
4762 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4763 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4764 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4765 return Op.getStartLoc();
4766 }
4767 return getLoc();
4768}
4769
4770bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4771 const OperandVector &Operands) {
4772 if (isCI() || isSI())
4773 return true;
4774
4775 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4776 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4777 return true;
4778
4779 auto Opcode = Inst.getOpcode();
4780 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4781 if (OpNum == -1)
4782 return true;
4783
4784 const auto &Op = Inst.getOperand(OpNum);
4785 if (!Op.isImm())
4786 return true;
4787
4788 uint64_t Offset = Op.getImm();
4789 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4792 return true;
4793
4794 Error(getSMEMOffsetLoc(Operands),
4795 isGFX12Plus() && IsBuffer
4796 ? "expected a 23-bit unsigned offset for buffer ops"
4797 : isGFX12Plus() ? "expected a 24-bit signed offset"
4798 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4799 : "expected a 21-bit signed offset");
4800
4801 return false;
4802}
4803
4804bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst,
4805 const OperandVector &Operands) {
4806 unsigned Opcode = Inst.getOpcode();
4807 const MCInstrDesc &Desc = MII.get(Opcode);
4808 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4809 return true;
4810
4811 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4812 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4813
4814 const int OpIndices[] = { Src0Idx, Src1Idx };
4815
4816 unsigned NumExprs = 0;
4817 unsigned NumLiterals = 0;
4818 int64_t LiteralValue;
4819
4820 for (int OpIdx : OpIndices) {
4821 if (OpIdx == -1) break;
4822
4823 const MCOperand &MO = Inst.getOperand(OpIdx);
4824 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4826 bool IsLit = false;
4827 std::optional<int64_t> Imm;
4828 if (MO.isImm()) {
4829 Imm = MO.getImm();
4830 } else if (MO.isExpr()) {
4831 if (isLitExpr(MO.getExpr())) {
4832 IsLit = true;
4833 Imm = getLitValue(MO.getExpr());
4834 }
4835 } else {
4836 continue;
4837 }
4838
4839 if (!Imm.has_value()) {
4840 ++NumExprs;
4841 } else if (!isInlineConstant(Inst, OpIdx)) {
4842 auto OpType = static_cast<AMDGPU::OperandType>(
4843 Desc.operands()[OpIdx].OperandType);
4844 int64_t Value = encode32BitLiteral(*Imm, OpType, IsLit);
4845 if (NumLiterals == 0 || LiteralValue != Value) {
4847 ++NumLiterals;
4848 }
4849 }
4850 }
4851 }
4852
4853 if (NumLiterals + NumExprs <= 1)
4854 return true;
4855
4856 Error(getOperandLoc(Operands, Src1Idx),
4857 "only one unique literal operand is allowed");
4858 return false;
4859}
4860
4861bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4862 const unsigned Opc = Inst.getOpcode();
4863 if (isPermlane16(Opc)) {
4864 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4865 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4866
4867 if (OpSel & ~3)
4868 return false;
4869 }
4870
4871 uint64_t TSFlags = MII.get(Opc).TSFlags;
4872
4873 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4874 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4875 if (OpSelIdx != -1) {
4876 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4877 return false;
4878 }
4879 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4880 if (OpSelHiIdx != -1) {
4881 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4882 return false;
4883 }
4884 }
4885
4886 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4887 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4888 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4889 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4890 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4891 if (OpSel & 3)
4892 return false;
4893 }
4894
4895 // Packed math FP32 instructions typically accept SGPRs or VGPRs as source
4896 // operands. On gfx12+, if a source operand uses SGPRs, the HW can only read
4897 // the first SGPR and use it for both the low and high operations.
4898 if (isPackedFP32Inst(Opc) && isGFX12Plus()) {
4899 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4900 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4901 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4902 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4903
4904 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4905 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4906 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4907 unsigned OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
4908
4909 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4910
4911 auto VerifyOneSGPR = [OpSel, OpSelHi](unsigned Index) -> bool {
4912 unsigned Mask = 1U << Index;
4913 return ((OpSel & Mask) == 0) && ((OpSelHi & Mask) == 0);
4914 };
4915
4916 if (Src0.isReg() && isSGPR(Src0.getReg(), TRI) &&
4917 !VerifyOneSGPR(/*Index=*/0))
4918 return false;
4919 if (Src1.isReg() && isSGPR(Src1.getReg(), TRI) &&
4920 !VerifyOneSGPR(/*Index=*/1))
4921 return false;
4922
4923 int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4924 if (Src2Idx != -1) {
4925 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4926 if (Src2.isReg() && isSGPR(Src2.getReg(), TRI) &&
4927 !VerifyOneSGPR(/*Index=*/2))
4928 return false;
4929 }
4930 }
4931
4932 return true;
4933}
4934
4935bool AMDGPUAsmParser::validateTrue16OpSel(const MCInst &Inst) {
4936 if (!hasTrue16Insts())
4937 return true;
4938 const MCRegisterInfo *MRI = getMRI();
4939 const unsigned Opc = Inst.getOpcode();
4940 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4941 if (OpSelIdx == -1)
4942 return true;
4943 unsigned OpSelOpValue = Inst.getOperand(OpSelIdx).getImm();
4944 // If the value is 0 we could have a default OpSel Operand, so conservatively
4945 // allow it.
4946 if (OpSelOpValue == 0)
4947 return true;
4948 unsigned OpCount = 0;
4949 for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
4950 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
4951 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), OpName);
4952 if (OpIdx == -1)
4953 continue;
4954 const MCOperand &Op = Inst.getOperand(OpIdx);
4955 if (Op.isReg() &&
4956 MRI->getRegClass(AMDGPU::VGPR_16RegClassID).contains(Op.getReg())) {
4957 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Op.getReg(), *MRI);
4958 bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);
4959 if (OpSelOpIsHi != VGPRSuffixIsHi)
4960 return false;
4961 }
4962 ++OpCount;
4963 }
4964
4965 return true;
4966}
4967
4968bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, AMDGPU::OpName OpName) {
4969 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
4970
4971 const unsigned Opc = Inst.getOpcode();
4972 uint64_t TSFlags = MII.get(Opc).TSFlags;
4973
4974 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
4975 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
4976 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
4977 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
4978 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
4979 !(TSFlags & SIInstrFlags::IsSWMMAC))
4980 return true;
4981
4982 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
4983 if (NegIdx == -1)
4984 return true;
4985
4986 unsigned Neg = Inst.getOperand(NegIdx).getImm();
4987
4988 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
4989 // on some src operands but not allowed on other.
4990 // It is convenient that such instructions don't have src_modifiers operand
4991 // for src operands that don't allow neg because they also don't allow opsel.
4992
4993 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4994 AMDGPU::OpName::src1_modifiers,
4995 AMDGPU::OpName::src2_modifiers};
4996
4997 for (unsigned i = 0; i < 3; ++i) {
4998 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
4999 if (Neg & (1 << i))
5000 return false;
5001 }
5002 }
5003
5004 return true;
5005}
5006
5007bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
5008 const OperandVector &Operands) {
5009 const unsigned Opc = Inst.getOpcode();
5010 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
5011 if (DppCtrlIdx >= 0) {
5012 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
5013
5014 if (!AMDGPU::isLegalDPALU_DPPControl(getSTI(), DppCtrl) &&
5015 AMDGPU::isDPALU_DPP(MII.get(Opc), MII, getSTI())) {
5016 // DP ALU DPP is supported for row_newbcast only on GFX9* and row_share
5017 // only on GFX12.
5018 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
5019 Error(S, isGFX12() ? "DP ALU dpp only supports row_share"
5020 : "DP ALU dpp only supports row_newbcast");
5021 return false;
5022 }
5023 }
5024
5025 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
5026 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
5027
5028 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
5029 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
5030 if (Src1Idx >= 0) {
5031 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
5032 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5033 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
5034 Error(getOperandLoc(Operands, Src1Idx),
5035 "invalid operand for instruction");
5036 return false;
5037 }
5038 if (Src1.isImm()) {
5039 Error(getInstLoc(Operands),
5040 "src1 immediate operand invalid for instruction");
5041 return false;
5042 }
5043 }
5044 }
5045
5046 return true;
5047}
5048
5049// Check if VCC register matches wavefront size
5050bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const {
5051 return (Reg == AMDGPU::VCC && isWave64()) ||
5052 (Reg == AMDGPU::VCC_LO && isWave32());
5053}
5054
5055// One unique literal can be used. VOP3 literal is only allowed in GFX10+
5056bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
5057 const OperandVector &Operands) {
5058 unsigned Opcode = Inst.getOpcode();
5059 const MCInstrDesc &Desc = MII.get(Opcode);
5060 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
5061 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
5062 !HasMandatoryLiteral && !isVOPD(Opcode))
5063 return true;
5064
5065 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
5066
5067 std::optional<unsigned> LiteralOpIdx;
5068 std::optional<uint64_t> LiteralValue;
5069
5070 for (int OpIdx : OpIndices) {
5071 if (OpIdx == -1)
5072 continue;
5073
5074 const MCOperand &MO = Inst.getOperand(OpIdx);
5075 if (!MO.isImm() && !MO.isExpr())
5076 continue;
5077 if (!isSISrcOperand(Desc, OpIdx))
5078 continue;
5079
5080 std::optional<int64_t> Imm;
5081 if (MO.isImm())
5082 Imm = MO.getImm();
5083 else if (MO.isExpr() && isLitExpr(MO.getExpr()))
5084 Imm = getLitValue(MO.getExpr());
5085
5086 bool IsAnotherLiteral = false;
5087 if (!Imm.has_value()) {
5088 // Literal value not known, so we conservately assume it's different.
5089 IsAnotherLiteral = true;
5090 } else if (!isInlineConstant(Inst, OpIdx)) {
5091 uint64_t Value = *Imm;
5092 bool IsForcedFP64 =
5093 Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_KIMM64 ||
5095 HasMandatoryLiteral);
5096 bool IsFP64 = (IsForcedFP64 || AMDGPU::isSISrcFPOperand(Desc, OpIdx)) &&
5097 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
5098 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
5099
5100 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value) &&
5101 !IsForcedFP64 && (!has64BitLiterals() || Desc.getSize() != 4)) {
5102 Error(getOperandLoc(Operands, OpIdx),
5103 "invalid operand for instruction");
5104 return false;
5105 }
5106
5107 if (IsFP64 && IsValid32Op && !IsForcedFP64)
5108 Value = Hi_32(Value);
5109
5110 IsAnotherLiteral = !LiteralValue || *LiteralValue != Value;
5112 }
5113
5114 if (IsAnotherLiteral && !HasMandatoryLiteral &&
5115 !getFeatureBits()[FeatureVOP3Literal]) {
5116 Error(getOperandLoc(Operands, OpIdx),
5117 "literal operands are not supported");
5118 return false;
5119 }
5120
5121 if (LiteralOpIdx && IsAnotherLiteral) {
5122 Error(getLaterLoc(getOperandLoc(Operands, OpIdx),
5123 getOperandLoc(Operands, *LiteralOpIdx)),
5124 "only one unique literal operand is allowed");
5125 return false;
5126 }
5127
5128 if (IsAnotherLiteral)
5129 LiteralOpIdx = OpIdx;
5130 }
5131
5132 return true;
5133}
5134
5135// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
5136static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name,
5137 const MCRegisterInfo *MRI) {
5138 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name);
5139 if (OpIdx < 0)
5140 return -1;
5141
5142 const MCOperand &Op = Inst.getOperand(OpIdx);
5143 if (!Op.isReg())
5144 return -1;
5145
5146 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5147 auto Reg = Sub ? Sub : Op.getReg();
5148 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5149 return AGPR32.contains(Reg) ? 1 : 0;
5150}
5151
5152bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
5153 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5154 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
5156 SIInstrFlags::DS)) == 0)
5157 return true;
5158
5159 AMDGPU::OpName DataName = (TSFlags & SIInstrFlags::DS)
5160 ? AMDGPU::OpName::data0
5161 : AMDGPU::OpName::vdata;
5162
5163 const MCRegisterInfo *MRI = getMRI();
5164 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
5165 int DataAreg = IsAGPROperand(Inst, DataName, MRI);
5166
5167 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
5168 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
5169 if (Data2Areg >= 0 && Data2Areg != DataAreg)
5170 return false;
5171 }
5172
5173 auto FB = getFeatureBits();
5174 if (FB[AMDGPU::FeatureGFX90AInsts]) {
5175 if (DataAreg < 0 || DstAreg < 0)
5176 return true;
5177 return DstAreg == DataAreg;
5178 }
5179
5180 return DstAreg < 1 && DataAreg < 1;
5181}
5182
5183bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
5184 auto FB = getFeatureBits();
5185 if (!FB[AMDGPU::FeatureRequiresAlignedVGPRs])
5186 return true;
5187
5188 unsigned Opc = Inst.getOpcode();
5189 const MCRegisterInfo *MRI = getMRI();
5190 // DS_READ_B96_TR_B6 is the only DS instruction in GFX950, that allows
5191 // unaligned VGPR. All others only allow even aligned VGPRs.
5192 if (FB[AMDGPU::FeatureGFX90AInsts] && Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5193 return true;
5194
5195 if (FB[AMDGPU::FeatureGFX1250Insts]) {
5196 switch (Opc) {
5197 default:
5198 break;
5199 case AMDGPU::DS_LOAD_TR6_B96:
5200 case AMDGPU::DS_LOAD_TR6_B96_gfx12:
5201 // DS_LOAD_TR6_B96 is the only DS instruction in GFX1250, that
5202 // allows unaligned VGPR. All others only allow even aligned VGPRs.
5203 return true;
5204 case AMDGPU::GLOBAL_LOAD_TR6_B96:
5205 case AMDGPU::GLOBAL_LOAD_TR6_B96_gfx1250: {
5206 // GLOBAL_LOAD_TR6_B96 is the only GLOBAL instruction in GFX1250, that
5207 // allows unaligned VGPR for vdst, but other operands still only allow
5208 // even aligned VGPRs.
5209 int VAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
5210 if (VAddrIdx != -1) {
5211 const MCOperand &Op = Inst.getOperand(VAddrIdx);
5212 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5213 if ((Sub - AMDGPU::VGPR0) & 1)
5214 return false;
5215 }
5216 return true;
5217 }
5218 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR:
5219 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR_gfx1250:
5220 return true;
5221 }
5222 }
5223
5224 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5225 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5226 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
5227 const MCOperand &Op = Inst.getOperand(I);
5228 if (!Op.isReg())
5229 continue;
5230
5231 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5232 if (!Sub)
5233 continue;
5234
5235 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
5236 return false;
5237 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
5238 return false;
5239 }
5240
5241 return true;
5242}
5243
5244SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
5245 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5246 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5247 if (Op.isBLGP())
5248 return Op.getStartLoc();
5249 }
5250 return SMLoc();
5251}
5252
5253bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
5254 const OperandVector &Operands) {
5255 unsigned Opc = Inst.getOpcode();
5256 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
5257 if (BlgpIdx == -1)
5258 return true;
5259 SMLoc BLGPLoc = getBLGPLoc(Operands);
5260 if (!BLGPLoc.isValid())
5261 return true;
5262 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
5263 auto FB = getFeatureBits();
5264 bool UsesNeg = false;
5265 if (FB[AMDGPU::FeatureGFX940Insts]) {
5266 switch (Opc) {
5267 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
5268 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
5269 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
5270 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
5271 UsesNeg = true;
5272 }
5273 }
5274
5275 if (IsNeg == UsesNeg)
5276 return true;
5277
5278 Error(BLGPLoc,
5279 UsesNeg ? "invalid modifier: blgp is not supported"
5280 : "invalid modifier: neg is not supported");
5281
5282 return false;
5283}
5284
5285bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
5286 const OperandVector &Operands) {
5287 if (!isGFX11Plus())
5288 return true;
5289
5290 unsigned Opc = Inst.getOpcode();
5291 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
5292 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
5293 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
5294 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
5295 return true;
5296
5297 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
5298 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
5299 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
5300 if (Reg == AMDGPU::SGPR_NULL)
5301 return true;
5302
5303 Error(getOperandLoc(Operands, Src0Idx), "src0 must be null");
5304 return false;
5305}
5306
5307bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
5308 const OperandVector &Operands) {
5309 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5310 if ((TSFlags & SIInstrFlags::DS) == 0)
5311 return true;
5312 if (TSFlags & SIInstrFlags::GWS)
5313 return validateGWS(Inst, Operands);
5314 // Only validate GDS for non-GWS instructions.
5315 if (hasGDS())
5316 return true;
5317 int GDSIdx =
5318 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
5319 if (GDSIdx < 0)
5320 return true;
5321 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
5322 if (GDS) {
5323 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
5324 Error(S, "gds modifier is not supported on this GPU");
5325 return false;
5326 }
5327 return true;
5328}
5329
5330// gfx90a has an undocumented limitation:
5331// DS_GWS opcodes must use even aligned registers.
5332bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
5333 const OperandVector &Operands) {
5334 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5335 return true;
5336
5337 int Opc = Inst.getOpcode();
5338 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5339 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5340 return true;
5341
5342 const MCRegisterInfo *MRI = getMRI();
5343 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5344 int Data0Pos =
5345 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
5346 assert(Data0Pos != -1);
5347 auto Reg = Inst.getOperand(Data0Pos).getReg();
5348 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5349 if (RegIdx & 1) {
5350 Error(getOperandLoc(Operands, Data0Pos), "vgpr must be even aligned");
5351 return false;
5352 }
5353
5354 return true;
5355}
5356
5357bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
5358 const OperandVector &Operands,
5359 SMLoc IDLoc) {
5360 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
5361 AMDGPU::OpName::cpol);
5362 if (CPolPos == -1)
5363 return true;
5364
5365 unsigned CPol = Inst.getOperand(CPolPos).getImm();
5366
5367 if (!isGFX1250()) {
5368 if (CPol & CPol::SCAL) {
5369 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5370 StringRef CStr(S.getPointer());
5371 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5372 Error(S, "scale_offset is not supported on this GPU");
5373 }
5374 if (CPol & CPol::NV) {
5375 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5376 StringRef CStr(S.getPointer());
5377 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]);
5378 Error(S, "nv is not supported on this GPU");
5379 }
5380 }
5381
5382 if ((CPol & CPol::SCAL) && !supportsScaleOffset(MII, Inst.getOpcode())) {
5383 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5384 StringRef CStr(S.getPointer());
5385 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5386 Error(S, "scale_offset is not supported for this instruction");
5387 }
5388
5389 if (isGFX12Plus())
5390 return validateTHAndScopeBits(Inst, Operands, CPol);
5391
5392 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5393 if (TSFlags & SIInstrFlags::SMRD) {
5394 if (CPol && (isSI() || isCI())) {
5395 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5396 Error(S, "cache policy is not supported for SMRD instructions");
5397 return false;
5398 }
5399 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
5400 Error(IDLoc, "invalid cache policy for SMEM instruction");
5401 return false;
5402 }
5403 }
5404
5405 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
5406 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
5409 if (!(TSFlags & AllowSCCModifier)) {
5410 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5411 StringRef CStr(S.getPointer());
5412 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
5413 Error(S,
5414 "scc modifier is not supported for this instruction on this GPU");
5415 return false;
5416 }
5417 }
5418
5420 return true;
5421
5422 if (TSFlags & SIInstrFlags::IsAtomicRet) {
5423 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
5424 Error(IDLoc, isGFX940() ? "instruction must use sc0"
5425 : "instruction must use glc");
5426 return false;
5427 }
5428 } else {
5429 if (CPol & CPol::GLC) {
5430 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5431 StringRef CStr(S.getPointer());
5433 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
5434 Error(S, isGFX940() ? "instruction must not use sc0"
5435 : "instruction must not use glc");
5436 return false;
5437 }
5438 }
5439
5440 return true;
5441}
5442
5443bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
5444 const OperandVector &Operands,
5445 const unsigned CPol) {
5446 const unsigned TH = CPol & AMDGPU::CPol::TH;
5447 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
5448
5449 const unsigned Opcode = Inst.getOpcode();
5450 const MCInstrDesc &TID = MII.get(Opcode);
5451
5452 auto PrintError = [&](StringRef Msg) {
5453 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5454 Error(S, Msg);
5455 return false;
5456 };
5457
5458 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
5461 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
5462
5463 if (TH == 0)
5464 return true;
5465
5466 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
5467 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
5468 (TH == AMDGPU::CPol::TH_NT_HT)))
5469 return PrintError("invalid th value for SMEM instruction");
5470
5471 if (TH == AMDGPU::CPol::TH_BYPASS) {
5472 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5474 (Scope == AMDGPU::CPol::SCOPE_SYS &&
5476 return PrintError("scope and th combination is not valid");
5477 }
5478
5479 unsigned THType = AMDGPU::getTemporalHintType(TID);
5480 if (THType == AMDGPU::CPol::TH_TYPE_ATOMIC) {
5481 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5482 return PrintError("invalid th value for atomic instructions");
5483 } else if (THType == AMDGPU::CPol::TH_TYPE_STORE) {
5484 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5485 return PrintError("invalid th value for store instructions");
5486 } else {
5487 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5488 return PrintError("invalid th value for load instructions");
5489 }
5490
5491 return true;
5492}
5493
5494bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5495 const OperandVector &Operands) {
5496 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5497 if (Desc.mayStore() &&
5499 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5500 if (Loc != getInstLoc(Operands)) {
5501 Error(Loc, "TFE modifier has no meaning for store instructions");
5502 return false;
5503 }
5504 }
5505
5506 return true;
5507}
5508
5509bool AMDGPUAsmParser::validateSetVgprMSB(const MCInst &Inst,
5510 const OperandVector &Operands) {
5511 if (Inst.getOpcode() != AMDGPU::S_SET_VGPR_MSB_gfx12)
5512 return true;
5513
5514 int Simm16Pos =
5515 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::simm16);
5516 if ((unsigned)Inst.getOperand(Simm16Pos).getImm() > 255) {
5517 SMLoc Loc = Operands[1]->getStartLoc();
5518 Error(Loc, "s_set_vgpr_msb accepts values in range [0..255]");
5519 return false;
5520 }
5521
5522 return true;
5523}
5524
5525bool AMDGPUAsmParser::validateWMMA(const MCInst &Inst,
5526 const OperandVector &Operands) {
5527 unsigned Opc = Inst.getOpcode();
5528 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5529 const MCInstrDesc &Desc = MII.get(Opc);
5530
5531 auto validateFmt = [&](AMDGPU::OpName FmtOp, AMDGPU::OpName SrcOp) -> bool {
5532 int FmtIdx = AMDGPU::getNamedOperandIdx(Opc, FmtOp);
5533 if (FmtIdx == -1)
5534 return true;
5535 unsigned Fmt = Inst.getOperand(FmtIdx).getImm();
5536 int SrcIdx = AMDGPU::getNamedOperandIdx(Opc, SrcOp);
5537 unsigned RegSize =
5538 TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[SrcIdx], HwMode))
5539 .getSizeInBits();
5540
5542 return true;
5543
5544 static const char *FmtNames[] = {"MATRIX_FMT_FP8", "MATRIX_FMT_BF8",
5545 "MATRIX_FMT_FP6", "MATRIX_FMT_BF6",
5546 "MATRIX_FMT_FP4"};
5547
5548 Error(getOperandLoc(Operands, SrcIdx),
5549 "wrong register tuple size for " + Twine(FmtNames[Fmt]));
5550 return false;
5551 };
5552
5553 return validateFmt(AMDGPU::OpName::matrix_a_fmt, AMDGPU::OpName::src0) &&
5554 validateFmt(AMDGPU::OpName::matrix_b_fmt, AMDGPU::OpName::src1);
5555}
5556
5557bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, SMLoc IDLoc,
5558 const OperandVector &Operands) {
5559 if (!validateLdsDirect(Inst, Operands))
5560 return false;
5561 if (!validateTrue16OpSel(Inst)) {
5562 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5563 "op_sel operand conflicts with 16-bit operand suffix");
5564 return false;
5565 }
5566 if (!validateSOPLiteral(Inst, Operands))
5567 return false;
5568 if (!validateVOPLiteral(Inst, Operands)) {
5569 return false;
5570 }
5571 if (!validateConstantBusLimitations(Inst, Operands)) {
5572 return false;
5573 }
5574 if (!validateVOPD(Inst, Operands)) {
5575 return false;
5576 }
5577 if (!validateIntClampSupported(Inst)) {
5578 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5579 "integer clamping is not supported on this GPU");
5580 return false;
5581 }
5582 if (!validateOpSel(Inst)) {
5583 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5584 "invalid op_sel operand");
5585 return false;
5586 }
5587 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5588 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5589 "invalid neg_lo operand");
5590 return false;
5591 }
5592 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5593 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5594 "invalid neg_hi operand");
5595 return false;
5596 }
5597 if (!validateDPP(Inst, Operands)) {
5598 return false;
5599 }
5600 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5601 if (!validateMIMGD16(Inst)) {
5602 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5603 "d16 modifier is not supported on this GPU");
5604 return false;
5605 }
5606 if (!validateMIMGDim(Inst, Operands)) {
5607 Error(IDLoc, "missing dim operand");
5608 return false;
5609 }
5610 if (!validateTensorR128(Inst)) {
5611 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5612 "instruction must set modifier r128=0");
5613 return false;
5614 }
5615 if (!validateMIMGMSAA(Inst)) {
5616 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5617 "invalid dim; must be MSAA type");
5618 return false;
5619 }
5620 if (!validateMIMGDataSize(Inst, IDLoc)) {
5621 return false;
5622 }
5623 if (!validateMIMGAddrSize(Inst, IDLoc))
5624 return false;
5625 if (!validateMIMGAtomicDMask(Inst)) {
5626 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5627 "invalid atomic image dmask");
5628 return false;
5629 }
5630 if (!validateMIMGGatherDMask(Inst)) {
5631 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5632 "invalid image_gather dmask: only one bit must be set");
5633 return false;
5634 }
5635 if (!validateMovrels(Inst, Operands)) {
5636 return false;
5637 }
5638 if (!validateOffset(Inst, Operands)) {
5639 return false;
5640 }
5641 if (!validateMAIAccWrite(Inst, Operands)) {
5642 return false;
5643 }
5644 if (!validateMAISrc2(Inst, Operands)) {
5645 return false;
5646 }
5647 if (!validateMFMA(Inst, Operands)) {
5648 return false;
5649 }
5650 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5651 return false;
5652 }
5653
5654 if (!validateAGPRLdSt(Inst)) {
5655 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5656 ? "invalid register class: data and dst should be all VGPR or AGPR"
5657 : "invalid register class: agpr loads and stores not supported on this GPU"
5658 );
5659 return false;
5660 }
5661 if (!validateVGPRAlign(Inst)) {
5662 Error(IDLoc,
5663 "invalid register class: vgpr tuples must be 64 bit aligned");
5664 return false;
5665 }
5666 if (!validateDS(Inst, Operands)) {
5667 return false;
5668 }
5669
5670 if (!validateBLGP(Inst, Operands)) {
5671 return false;
5672 }
5673
5674 if (!validateDivScale(Inst)) {
5675 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5676 return false;
5677 }
5678 if (!validateWaitCnt(Inst, Operands)) {
5679 return false;
5680 }
5681 if (!validateTFE(Inst, Operands)) {
5682 return false;
5683 }
5684 if (!validateSetVgprMSB(Inst, Operands)) {
5685 return false;
5686 }
5687 if (!validateWMMA(Inst, Operands)) {
5688 return false;
5689 }
5690
5691 return true;
5692}
5693
5695 const FeatureBitset &FBS,
5696 unsigned VariantID = 0);
5697
5698static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5699 const FeatureBitset &AvailableFeatures,
5700 unsigned VariantID);
5701
5702bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5703 const FeatureBitset &FBS) {
5704 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5705}
5706
5707bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5708 const FeatureBitset &FBS,
5709 ArrayRef<unsigned> Variants) {
5710 for (auto Variant : Variants) {
5711 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5712 return true;
5713 }
5714
5715 return false;
5716}
5717
5718bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5719 SMLoc IDLoc) {
5720 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5721
5722 // Check if requested instruction variant is supported.
5723 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5724 return false;
5725
5726 // This instruction is not supported.
5727 // Clear any other pending errors because they are no longer relevant.
5728 getParser().clearPendingErrors();
5729
5730 // Requested instruction variant is not supported.
5731 // Check if any other variants are supported.
5732 StringRef VariantName = getMatchedVariantName();
5733 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5734 return Error(IDLoc,
5735 Twine(VariantName,
5736 " variant of this instruction is not supported"));
5737 }
5738
5739 // Check if this instruction may be used with a different wavesize.
5740 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5741 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5742 // FIXME: Use getAvailableFeatures, and do not manually recompute
5743 FeatureBitset FeaturesWS32 = getFeatureBits();
5744 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5745 .flip(AMDGPU::FeatureWavefrontSize32);
5746 FeatureBitset AvailableFeaturesWS32 =
5747 ComputeAvailableFeatures(FeaturesWS32);
5748
5749 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5750 return Error(IDLoc, "instruction requires wavesize=32");
5751 }
5752
5753 // Finally check if this instruction is supported on any other GPU.
5754 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5755 return Error(IDLoc, "instruction not supported on this GPU");
5756 }
5757
5758 // Instruction not supported on any GPU. Probably a typo.
5759 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5760 return Error(IDLoc, "invalid instruction" + Suggestion);
5761}
5762
5764 uint64_t InvalidOprIdx) {
5765 assert(InvalidOprIdx < Operands.size());
5766 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5767 if (Op.isToken() && InvalidOprIdx > 1) {
5768 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5769 return PrevOp.isToken() && PrevOp.getToken() == "::";
5770 }
5771 return false;
5772}
5773
5774bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5776 MCStreamer &Out,
5777 uint64_t &ErrorInfo,
5778 bool MatchingInlineAsm) {
5779 MCInst Inst;
5780 Inst.setLoc(IDLoc);
5781 unsigned Result = Match_Success;
5782 for (auto Variant : getMatchedVariants()) {
5783 uint64_t EI;
5784 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5785 Variant);
5786 // We order match statuses from least to most specific. We use most specific
5787 // status as resulting
5788 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5789 if (R == Match_Success || R == Match_MissingFeature ||
5790 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5791 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5792 Result != Match_MissingFeature)) {
5793 Result = R;
5794 ErrorInfo = EI;
5795 }
5796 if (R == Match_Success)
5797 break;
5798 }
5799
5800 if (Result == Match_Success) {
5801 if (!validateInstruction(Inst, IDLoc, Operands)) {
5802 return true;
5803 }
5804 Out.emitInstruction(Inst, getSTI());
5805 return false;
5806 }
5807
5808 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5809 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5810 return true;
5811 }
5812
5813 switch (Result) {
5814 default: break;
5815 case Match_MissingFeature:
5816 // It has been verified that the specified instruction
5817 // mnemonic is valid. A match was found but it requires
5818 // features which are not supported on this GPU.
5819 return Error(IDLoc, "operands are not valid for this GPU or mode");
5820
5821 case Match_InvalidOperand: {
5822 SMLoc ErrorLoc = IDLoc;
5823 if (ErrorInfo != ~0ULL) {
5824 if (ErrorInfo >= Operands.size()) {
5825 return Error(IDLoc, "too few operands for instruction");
5826 }
5827 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5828 if (ErrorLoc == SMLoc())
5829 ErrorLoc = IDLoc;
5830
5831 if (isInvalidVOPDY(Operands, ErrorInfo))
5832 return Error(ErrorLoc, "invalid VOPDY instruction");
5833 }
5834 return Error(ErrorLoc, "invalid operand for instruction");
5835 }
5836
5837 case Match_MnemonicFail:
5838 llvm_unreachable("Invalid instructions should have been handled already");
5839 }
5840 llvm_unreachable("Implement any new match types added!");
5841}
5842
5843bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5844 int64_t Tmp = -1;
5845 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5846 return true;
5847 }
5848 if (getParser().parseAbsoluteExpression(Tmp)) {
5849 return true;
5850 }
5851 Ret = static_cast<uint32_t>(Tmp);
5852 return false;
5853}
5854
5855bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5856 if (!getSTI().getTargetTriple().isAMDGCN())
5857 return TokError("directive only supported for amdgcn architecture");
5858
5859 std::string TargetIDDirective;
5860 SMLoc TargetStart = getTok().getLoc();
5861 if (getParser().parseEscapedString(TargetIDDirective))
5862 return true;
5863
5864 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5865 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5866 return getParser().Error(TargetRange.Start,
5867 (Twine(".amdgcn_target directive's target id ") +
5868 Twine(TargetIDDirective) +
5869 Twine(" does not match the specified target id ") +
5870 Twine(getTargetStreamer().getTargetID()->toString())).str());
5871
5872 return false;
5873}
5874
5875bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5876 return Error(Range.Start, "value out of range", Range);
5877}
5878
5879bool AMDGPUAsmParser::calculateGPRBlocks(
5880 const FeatureBitset &Features, const MCExpr *VCCUsed,
5881 const MCExpr *FlatScrUsed, bool XNACKUsed,
5882 std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
5883 SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5884 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) {
5885 // TODO(scott.linder): These calculations are duplicated from
5886 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5887 IsaVersion Version = getIsaVersion(getSTI().getCPU());
5888 MCContext &Ctx = getContext();
5889
5890 const MCExpr *NumSGPRs = NextFreeSGPR;
5891 int64_t EvaluatedSGPRs;
5892
5893 if (Version.Major >= 10)
5895 else {
5896 unsigned MaxAddressableNumSGPRs =
5898
5899 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && Version.Major >= 8 &&
5900 !Features.test(FeatureSGPRInitBug) &&
5901 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5902 return OutOfRangeError(SGPRRange);
5903
5904 const MCExpr *ExtraSGPRs =
5905 AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
5906 NumSGPRs = MCBinaryExpr::createAdd(NumSGPRs, ExtraSGPRs, Ctx);
5907
5908 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5909 (Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5910 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5911 return OutOfRangeError(SGPRRange);
5912
5913 if (Features.test(FeatureSGPRInitBug))
5914 NumSGPRs =
5916 }
5917
5918 // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
5919 // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
5920 auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
5921 unsigned Granule) -> const MCExpr * {
5922 const MCExpr *OneConst = MCConstantExpr::create(1ul, Ctx);
5923 const MCExpr *GranuleConst = MCConstantExpr::create(Granule, Ctx);
5924 const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);
5925 const MCExpr *AlignToGPR =
5926 AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);
5927 const MCExpr *DivGPR =
5928 MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);
5929 const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
5930 return SubGPR;
5931 };
5932
5933 VGPRBlocks = GetNumGPRBlocks(
5934 NextFreeVGPR,
5935 IsaInfo::getVGPREncodingGranule(&getSTI(), EnableWavefrontSize32));
5936 SGPRBlocks =
5937 GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(&getSTI()));
5938
5939 return false;
5940}
5941
5942bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5943 if (!getSTI().getTargetTriple().isAMDGCN())
5944 return TokError("directive only supported for amdgcn architecture");
5945
5946 if (!isHsaAbi(getSTI()))
5947 return TokError("directive only supported for amdhsa OS");
5948
5949 StringRef KernelName;
5950 if (getParser().parseIdentifier(KernelName))
5951 return true;
5952
5953 AMDGPU::MCKernelDescriptor KD =
5955 &getSTI(), getContext());
5956
5957 StringSet<> Seen;
5958
5959 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5960
5961 const MCExpr *ZeroExpr = MCConstantExpr::create(0, getContext());
5962 const MCExpr *OneExpr = MCConstantExpr::create(1, getContext());
5963
5964 SMRange VGPRRange;
5965 const MCExpr *NextFreeVGPR = ZeroExpr;
5966 const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext());
5967 const MCExpr *NamedBarCnt = ZeroExpr;
5968 uint64_t SharedVGPRCount = 0;
5969 uint64_t PreloadLength = 0;
5970 uint64_t PreloadOffset = 0;
5971 SMRange SGPRRange;
5972 const MCExpr *NextFreeSGPR = ZeroExpr;
5973
5974 // Count the number of user SGPRs implied from the enabled feature bits.
5975 unsigned ImpliedUserSGPRCount = 0;
5976
5977 // Track if the asm explicitly contains the directive for the user SGPR
5978 // count.
5979 std::optional<unsigned> ExplicitUserSGPRCount;
5980 const MCExpr *ReserveVCC = OneExpr;
5981 const MCExpr *ReserveFlatScr = OneExpr;
5982 std::optional<bool> EnableWavefrontSize32;
5983
5984 while (true) {
5985 while (trySkipToken(AsmToken::EndOfStatement));
5986
5987 StringRef ID;
5988 SMRange IDRange = getTok().getLocRange();
5989 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5990 return true;
5991
5992 if (ID == ".end_amdhsa_kernel")
5993 break;
5994
5995 if (!Seen.insert(ID).second)
5996 return TokError(".amdhsa_ directives cannot be repeated");
5997
5998 SMLoc ValStart = getLoc();
5999 const MCExpr *ExprVal;
6000 if (getParser().parseExpression(ExprVal))
6001 return true;
6002 SMLoc ValEnd = getLoc();
6003 SMRange ValRange = SMRange(ValStart, ValEnd);
6004
6005 int64_t IVal = 0;
6006 uint64_t Val = IVal;
6007 bool EvaluatableExpr;
6008 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
6009 if (IVal < 0)
6010 return OutOfRangeError(ValRange);
6011 Val = IVal;
6012 }
6013
6014#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
6015 if (!isUInt<ENTRY##_WIDTH>(Val)) \
6016 return OutOfRangeError(RANGE); \
6017 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
6018 getContext());
6019
6020// Some fields use the parsed value immediately which requires the expression to
6021// be solvable.
6022#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
6023 if (!(RESOLVED)) \
6024 return Error(IDRange.Start, "directive should have resolvable expression", \
6025 IDRange);
6026
6027 if (ID == ".amdhsa_group_segment_fixed_size") {
6029 CHAR_BIT>(Val))
6030 return OutOfRangeError(ValRange);
6031 KD.group_segment_fixed_size = ExprVal;
6032 } else if (ID == ".amdhsa_private_segment_fixed_size") {
6034 CHAR_BIT>(Val))
6035 return OutOfRangeError(ValRange);
6036 KD.private_segment_fixed_size = ExprVal;
6037 } else if (ID == ".amdhsa_kernarg_size") {
6038 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
6039 return OutOfRangeError(ValRange);
6040 KD.kernarg_size = ExprVal;
6041 } else if (ID == ".amdhsa_user_sgpr_count") {
6042 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6043 ExplicitUserSGPRCount = Val;
6044 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
6045 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6047 return Error(IDRange.Start,
6048 "directive is not supported with architected flat scratch",
6049 IDRange);
6051 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
6052 ExprVal, ValRange);
6053 if (Val)
6054 ImpliedUserSGPRCount += 4;
6055 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
6056 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6057 if (!hasKernargPreload())
6058 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6059
6060 if (Val > getMaxNumUserSGPRs())
6061 return OutOfRangeError(ValRange);
6062 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
6063 ValRange);
6064 if (Val) {
6065 ImpliedUserSGPRCount += Val;
6066 PreloadLength = Val;
6067 }
6068 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
6069 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6070 if (!hasKernargPreload())
6071 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6072
6073 if (Val >= 1024)
6074 return OutOfRangeError(ValRange);
6075 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
6076 ValRange);
6077 if (Val)
6078 PreloadOffset = Val;
6079 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
6080 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6082 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
6083 ValRange);
6084 if (Val)
6085 ImpliedUserSGPRCount += 2;
6086 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
6087 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6089 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
6090 ValRange);
6091 if (Val)
6092 ImpliedUserSGPRCount += 2;
6093 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
6094 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6096 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
6097 ExprVal, ValRange);
6098 if (Val)
6099 ImpliedUserSGPRCount += 2;
6100 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
6101 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6103 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
6104 ValRange);
6105 if (Val)
6106 ImpliedUserSGPRCount += 2;
6107 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
6109 return Error(IDRange.Start,
6110 "directive is not supported with architected flat scratch",
6111 IDRange);
6112 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6114 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
6115 ExprVal, ValRange);
6116 if (Val)
6117 ImpliedUserSGPRCount += 2;
6118 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
6119 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6121 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
6122 ExprVal, ValRange);
6123 if (Val)
6124 ImpliedUserSGPRCount += 1;
6125 } else if (ID == ".amdhsa_wavefront_size32") {
6126 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6127 if (IVersion.Major < 10)
6128 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6129 EnableWavefrontSize32 = Val;
6131 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
6132 ValRange);
6133 } else if (ID == ".amdhsa_uses_dynamic_stack") {
6135 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
6136 ValRange);
6137 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
6139 return Error(IDRange.Start,
6140 "directive is not supported with architected flat scratch",
6141 IDRange);
6143 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6144 ValRange);
6145 } else if (ID == ".amdhsa_enable_private_segment") {
6147 return Error(
6148 IDRange.Start,
6149 "directive is not supported without architected flat scratch",
6150 IDRange);
6152 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6153 ValRange);
6154 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
6156 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
6157 ValRange);
6158 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
6160 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
6161 ValRange);
6162 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
6164 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
6165 ValRange);
6166 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
6168 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
6169 ValRange);
6170 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
6172 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
6173 ValRange);
6174 } else if (ID == ".amdhsa_next_free_vgpr") {
6175 VGPRRange = ValRange;
6176 NextFreeVGPR = ExprVal;
6177 } else if (ID == ".amdhsa_next_free_sgpr") {
6178 SGPRRange = ValRange;
6179 NextFreeSGPR = ExprVal;
6180 } else if (ID == ".amdhsa_accum_offset") {
6181 if (!isGFX90A())
6182 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6183 AccumOffset = ExprVal;
6184 } else if (ID == ".amdhsa_named_barrier_count") {
6185 if (!isGFX1250())
6186 return Error(IDRange.Start, "directive requires gfx1250+", IDRange);
6187 NamedBarCnt = ExprVal;
6188 } else if (ID == ".amdhsa_reserve_vcc") {
6189 if (EvaluatableExpr && !isUInt<1>(Val))
6190 return OutOfRangeError(ValRange);
6191 ReserveVCC = ExprVal;
6192 } else if (ID == ".amdhsa_reserve_flat_scratch") {
6193 if (IVersion.Major < 7)
6194 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
6196 return Error(IDRange.Start,
6197 "directive is not supported with architected flat scratch",
6198 IDRange);
6199 if (EvaluatableExpr && !isUInt<1>(Val))
6200 return OutOfRangeError(ValRange);
6201 ReserveFlatScr = ExprVal;
6202 } else if (ID == ".amdhsa_reserve_xnack_mask") {
6203 if (IVersion.Major < 8)
6204 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
6205 if (!isUInt<1>(Val))
6206 return OutOfRangeError(ValRange);
6207 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
6208 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
6209 IDRange);
6210 } else if (ID == ".amdhsa_float_round_mode_32") {
6212 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
6213 ValRange);
6214 } else if (ID == ".amdhsa_float_round_mode_16_64") {
6216 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
6217 ValRange);
6218 } else if (ID == ".amdhsa_float_denorm_mode_32") {
6220 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
6221 ValRange);
6222 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
6224 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
6225 ValRange);
6226 } else if (ID == ".amdhsa_dx10_clamp") {
6227 if (IVersion.Major >= 12)
6228 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
6230 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
6231 ValRange);
6232 } else if (ID == ".amdhsa_ieee_mode") {
6233 if (IVersion.Major >= 12)
6234 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
6236 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
6237 ValRange);
6238 } else if (ID == ".amdhsa_fp16_overflow") {
6239 if (IVersion.Major < 9)
6240 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
6242 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
6243 ValRange);
6244 } else if (ID == ".amdhsa_tg_split") {
6245 if (!isGFX90A())
6246 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6247 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
6248 ExprVal, ValRange);
6249 } else if (ID == ".amdhsa_workgroup_processor_mode") {
6250 if (!supportsWGP(getSTI()))
6251 return Error(IDRange.Start,
6252 "directive unsupported on " + getSTI().getCPU(), IDRange);
6254 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
6255 ValRange);
6256 } else if (ID == ".amdhsa_memory_ordered") {
6257 if (IVersion.Major < 10)
6258 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6260 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
6261 ValRange);
6262 } else if (ID == ".amdhsa_forward_progress") {
6263 if (IVersion.Major < 10)
6264 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6266 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
6267 ValRange);
6268 } else if (ID == ".amdhsa_shared_vgpr_count") {
6269 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6270 if (IVersion.Major < 10 || IVersion.Major >= 12)
6271 return Error(IDRange.Start, "directive requires gfx10 or gfx11",
6272 IDRange);
6273 SharedVGPRCount = Val;
6275 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
6276 ValRange);
6277 } else if (ID == ".amdhsa_inst_pref_size") {
6278 if (IVersion.Major < 11)
6279 return Error(IDRange.Start, "directive requires gfx11+", IDRange);
6280 if (IVersion.Major == 11) {
6282 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
6283 ValRange);
6284 } else {
6286 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
6287 ValRange);
6288 }
6289 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
6292 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
6293 ExprVal, ValRange);
6294 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
6296 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
6297 ExprVal, ValRange);
6298 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
6301 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
6302 ExprVal, ValRange);
6303 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
6305 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
6306 ExprVal, ValRange);
6307 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
6309 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
6310 ExprVal, ValRange);
6311 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
6313 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
6314 ExprVal, ValRange);
6315 } else if (ID == ".amdhsa_exception_int_div_zero") {
6317 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
6318 ExprVal, ValRange);
6319 } else if (ID == ".amdhsa_round_robin_scheduling") {
6320 if (IVersion.Major < 12)
6321 return Error(IDRange.Start, "directive requires gfx12+", IDRange);
6323 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
6324 ValRange);
6325 } else {
6326 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
6327 }
6328
6329#undef PARSE_BITS_ENTRY
6330 }
6331
6332 if (!Seen.contains(".amdhsa_next_free_vgpr"))
6333 return TokError(".amdhsa_next_free_vgpr directive is required");
6334
6335 if (!Seen.contains(".amdhsa_next_free_sgpr"))
6336 return TokError(".amdhsa_next_free_sgpr directive is required");
6337
6338 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
6339
6340 // Consider the case where the total number of UserSGPRs with trailing
6341 // allocated preload SGPRs, is greater than the number of explicitly
6342 // referenced SGPRs.
6343 if (PreloadLength) {
6344 MCContext &Ctx = getContext();
6345 NextFreeSGPR = AMDGPUMCExpr::createMax(
6346 {NextFreeSGPR, MCConstantExpr::create(UserSGPRCount, Ctx)}, Ctx);
6347 }
6348
6349 const MCExpr *VGPRBlocks;
6350 const MCExpr *SGPRBlocks;
6351 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
6352 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
6353 EnableWavefrontSize32, NextFreeVGPR,
6354 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
6355 SGPRBlocks))
6356 return true;
6357
6358 int64_t EvaluatedVGPRBlocks;
6359 bool VGPRBlocksEvaluatable =
6360 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
6361 if (VGPRBlocksEvaluatable &&
6363 static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
6364 return OutOfRangeError(VGPRRange);
6365 }
6367 KD.compute_pgm_rsrc1, VGPRBlocks,
6368 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
6369 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
6370
6371 int64_t EvaluatedSGPRBlocks;
6372 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
6374 static_cast<uint64_t>(EvaluatedSGPRBlocks)))
6375 return OutOfRangeError(SGPRRange);
6377 KD.compute_pgm_rsrc1, SGPRBlocks,
6378 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
6379 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
6380
6381 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
6382 return TokError("amdgpu_user_sgpr_count smaller than than implied by "
6383 "enabled user SGPRs");
6384
6385 if (isGFX1250()) {
6387 return TokError("too many user SGPRs enabled");
6390 MCConstantExpr::create(UserSGPRCount, getContext()),
6391 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
6392 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT, getContext());
6393 } else {
6395 UserSGPRCount))
6396 return TokError("too many user SGPRs enabled");
6399 MCConstantExpr::create(UserSGPRCount, getContext()),
6400 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
6401 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT, getContext());
6402 }
6403
6404 int64_t IVal = 0;
6405 if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
6406 return TokError("Kernarg size should be resolvable");
6407 uint64_t kernarg_size = IVal;
6408 if (PreloadLength && kernarg_size &&
6409 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
6410 return TokError("Kernarg preload length + offset is larger than the "
6411 "kernarg segment size");
6412
6413 if (isGFX90A()) {
6414 if (!Seen.contains(".amdhsa_accum_offset"))
6415 return TokError(".amdhsa_accum_offset directive is required");
6416 int64_t EvaluatedAccum;
6417 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
6418 uint64_t UEvaluatedAccum = EvaluatedAccum;
6419 if (AccumEvaluatable &&
6420 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
6421 return TokError("accum_offset should be in range [4..256] in "
6422 "increments of 4");
6423
6424 int64_t EvaluatedNumVGPR;
6425 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
6426 AccumEvaluatable &&
6427 UEvaluatedAccum >
6428 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
6429 return TokError("accum_offset exceeds total VGPR allocation");
6430 const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
6432 AccumOffset, MCConstantExpr::create(4, getContext()), getContext()),
6435 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6436 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6437 getContext());
6438 }
6439
6440 if (isGFX1250())
6442 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
6443 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
6444 getContext());
6445
6446 if (IVersion.Major >= 10 && IVersion.Major < 12) {
6447 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
6448 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6449 return TokError("shared_vgpr_count directive not valid on "
6450 "wavefront size 32");
6451 }
6452
6453 if (VGPRBlocksEvaluatable &&
6454 (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
6455 63)) {
6456 return TokError("shared_vgpr_count*2 + "
6457 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6458 "exceed 63\n");
6459 }
6460 }
6461
6462 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
6463 NextFreeVGPR, NextFreeSGPR,
6464 ReserveVCC, ReserveFlatScr);
6465 return false;
6466}
6467
6468bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6469 uint32_t Version;
6470 if (ParseAsAbsoluteExpression(Version))
6471 return true;
6472
6473 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
6474 return false;
6475}
6476
6477bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
6478 AMDGPUMCKernelCodeT &C) {
6479 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
6480 // assembly for backwards compatibility.
6481 if (ID == "max_scratch_backing_memory_byte_size") {
6482 Parser.eatToEndOfStatement();
6483 return false;
6484 }
6485
6486 SmallString<40> ErrStr;
6487 raw_svector_ostream Err(ErrStr);
6488 if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
6489 return TokError(Err.str());
6490 }
6491 Lex();
6492
6493 if (ID == "enable_wavefront_size32") {
6494 if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
6495 if (!isGFX10Plus())
6496 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
6497 if (!isWave32())
6498 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
6499 } else {
6500 if (!isWave64())
6501 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
6502 }
6503 }
6504
6505 if (ID == "wavefront_size") {
6506 if (C.wavefront_size == 5) {
6507 if (!isGFX10Plus())
6508 return TokError("wavefront_size=5 is only allowed on GFX10+");
6509 if (!isWave32())
6510 return TokError("wavefront_size=5 requires +WavefrontSize32");
6511 } else if (C.wavefront_size == 6) {
6512 if (!isWave64())
6513 return TokError("wavefront_size=6 requires +WavefrontSize64");
6514 }
6515 }
6516
6517 return false;
6518}
6519
6520bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6521 AMDGPUMCKernelCodeT KernelCode;
6522 KernelCode.initDefault(&getSTI(), getContext());
6523
6524 while (true) {
6525 // Lex EndOfStatement. This is in a while loop, because lexing a comment
6526 // will set the current token to EndOfStatement.
6527 while(trySkipToken(AsmToken::EndOfStatement));
6528
6529 StringRef ID;
6530 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
6531 return true;
6532
6533 if (ID == ".end_amd_kernel_code_t")
6534 break;
6535
6536 if (ParseAMDKernelCodeTValue(ID, KernelCode))
6537 return true;
6538 }
6539
6540 KernelCode.validate(&getSTI(), getContext());
6541 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6542
6543 return false;
6544}
6545
6546bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6547 StringRef KernelName;
6548 if (!parseId(KernelName, "expected symbol name"))
6549 return true;
6550
6551 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6553
6554 KernelScope.initialize(getContext());
6555 return false;
6556}
6557
6558bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6559 if (!getSTI().getTargetTriple().isAMDGCN()) {
6560 return Error(getLoc(),
6561 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6562 "architectures");
6563 }
6564
6565 auto TargetIDDirective = getLexer().getTok().getStringContents();
6566 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6567 return Error(getParser().getTok().getLoc(), "target id must match options");
6568
6569 getTargetStreamer().EmitISAVersion();
6570 Lex();
6571
6572 return false;
6573}
6574
6575bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6576 assert(isHsaAbi(getSTI()));
6577
6578 std::string HSAMetadataString;
6579 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
6580 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
6581 return true;
6582
6583 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6584 return Error(getLoc(), "invalid HSA metadata");
6585
6586 return false;
6587}
6588
6589/// Common code to parse out a block of text (typically YAML) between start and
6590/// end directives.
6591bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6592 const char *AssemblerDirectiveEnd,
6593 std::string &CollectString) {
6594
6595 raw_string_ostream CollectStream(CollectString);
6596
6597 getLexer().setSkipSpace(false);
6598
6599 bool FoundEnd = false;
6600 while (!isToken(AsmToken::Eof)) {
6601 while (isToken(AsmToken::Space)) {
6602 CollectStream << getTokenStr();
6603 Lex();
6604 }
6605
6606 if (trySkipId(AssemblerDirectiveEnd)) {
6607 FoundEnd = true;
6608 break;
6609 }
6610
6611 CollectStream << Parser.parseStringToEndOfStatement()
6612 << getContext().getAsmInfo()->getSeparatorString();
6613
6614 Parser.eatToEndOfStatement();
6615 }
6616
6617 getLexer().setSkipSpace(true);
6618
6619 if (isToken(AsmToken::Eof) && !FoundEnd) {
6620 return TokError(Twine("expected directive ") +
6621 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6622 }
6623
6624 return false;
6625}
6626
6627/// Parse the assembler directive for new MsgPack-format PAL metadata.
6628bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6629 std::string String;
6630 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6632 return true;
6633
6634 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6635 if (!PALMetadata->setFromString(String))
6636 return Error(getLoc(), "invalid PAL metadata");
6637 return false;
6638}
6639
6640/// Parse the assembler directive for old linear-format PAL metadata.
6641bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6642 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6643 return Error(getLoc(),
6644 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6645 "not available on non-amdpal OSes")).str());
6646 }
6647
6648 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6649 PALMetadata->setLegacy();
6650 for (;;) {
6651 uint32_t Key, Value;
6652 if (ParseAsAbsoluteExpression(Key)) {
6653 return TokError(Twine("invalid value in ") +
6655 }
6656 if (!trySkipToken(AsmToken::Comma)) {
6657 return TokError(Twine("expected an even number of values in ") +
6659 }
6660 if (ParseAsAbsoluteExpression(Value)) {
6661 return TokError(Twine("invalid value in ") +
6663 }
6664 PALMetadata->setRegister(Key, Value);
6665 if (!trySkipToken(AsmToken::Comma))
6666 break;
6667 }
6668 return false;
6669}
6670
6671/// ParseDirectiveAMDGPULDS
6672/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6673bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6674 if (getParser().checkForValidSection())
6675 return true;
6676
6677 StringRef Name;
6678 SMLoc NameLoc = getLoc();
6679 if (getParser().parseIdentifier(Name))
6680 return TokError("expected identifier in directive");
6681
6682 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6683 if (getParser().parseComma())
6684 return true;
6685
6686 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
6687
6688 int64_t Size;
6689 SMLoc SizeLoc = getLoc();
6690 if (getParser().parseAbsoluteExpression(Size))
6691 return true;
6692 if (Size < 0)
6693 return Error(SizeLoc, "size must be non-negative");
6694 if (Size > LocalMemorySize)
6695 return Error(SizeLoc, "size is too large");
6696
6697 int64_t Alignment = 4;
6698 if (trySkipToken(AsmToken::Comma)) {
6699 SMLoc AlignLoc = getLoc();
6700 if (getParser().parseAbsoluteExpression(Alignment))
6701 return true;
6702 if (Alignment < 0 || !isPowerOf2_64(Alignment))
6703 return Error(AlignLoc, "alignment must be a power of two");
6704
6705 // Alignment larger than the size of LDS is possible in theory, as long
6706 // as the linker manages to place to symbol at address 0, but we do want
6707 // to make sure the alignment fits nicely into a 32-bit integer.
6708 if (Alignment >= 1u << 31)
6709 return Error(AlignLoc, "alignment is too large");
6710 }
6711
6712 if (parseEOL())
6713 return true;
6714
6715 Symbol->redefineIfPossible();
6716 if (!Symbol->isUndefined())
6717 return Error(NameLoc, "invalid symbol redefinition");
6718
6719 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6720 return false;
6721}
6722
6723bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6724 StringRef IDVal = DirectiveID.getString();
6725
6726 if (isHsaAbi(getSTI())) {
6727 if (IDVal == ".amdhsa_kernel")
6728 return ParseDirectiveAMDHSAKernel();
6729
6730 if (IDVal == ".amdhsa_code_object_version")
6731 return ParseDirectiveAMDHSACodeObjectVersion();
6732
6733 // TODO: Restructure/combine with PAL metadata directive.
6735 return ParseDirectiveHSAMetadata();
6736 } else {
6737 if (IDVal == ".amd_kernel_code_t")
6738 return ParseDirectiveAMDKernelCodeT();
6739
6740 if (IDVal == ".amdgpu_hsa_kernel")
6741 return ParseDirectiveAMDGPUHsaKernel();
6742
6743 if (IDVal == ".amd_amdgpu_isa")
6744 return ParseDirectiveISAVersion();
6745
6747 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6748 Twine(" directive is "
6749 "not available on non-amdhsa OSes"))
6750 .str());
6751 }
6752 }
6753
6754 if (IDVal == ".amdgcn_target")
6755 return ParseDirectiveAMDGCNTarget();
6756
6757 if (IDVal == ".amdgpu_lds")
6758 return ParseDirectiveAMDGPULDS();
6759
6760 if (IDVal == PALMD::AssemblerDirectiveBegin)
6761 return ParseDirectivePALMetadataBegin();
6762
6763 if (IDVal == PALMD::AssemblerDirective)
6764 return ParseDirectivePALMetadata();
6765
6766 return true;
6767}
6768
6769bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6770 MCRegister Reg) {
6771 if (MRI.regsOverlap(TTMP12_TTMP13_TTMP14_TTMP15, Reg))
6772 return isGFX9Plus();
6773
6774 // GFX10+ has 2 more SGPRs 104 and 105.
6775 if (MRI.regsOverlap(SGPR104_SGPR105, Reg))
6776 return hasSGPR104_SGPR105();
6777
6778 switch (Reg.id()) {
6779 case SRC_SHARED_BASE_LO:
6780 case SRC_SHARED_BASE:
6781 case SRC_SHARED_LIMIT_LO:
6782 case SRC_SHARED_LIMIT:
6783 case SRC_PRIVATE_BASE_LO:
6784 case SRC_PRIVATE_BASE:
6785 case SRC_PRIVATE_LIMIT_LO:
6786 case SRC_PRIVATE_LIMIT:
6787 return isGFX9Plus();
6788 case SRC_FLAT_SCRATCH_BASE_LO:
6789 case SRC_FLAT_SCRATCH_BASE_HI:
6790 return hasGloballyAddressableScratch();
6791 case SRC_POPS_EXITING_WAVE_ID:
6792 return isGFX9Plus() && !isGFX11Plus();
6793 case TBA:
6794 case TBA_LO:
6795 case TBA_HI:
6796 case TMA:
6797 case TMA_LO:
6798 case TMA_HI:
6799 return !isGFX9Plus();
6800 case XNACK_MASK:
6801 case XNACK_MASK_LO:
6802 case XNACK_MASK_HI:
6803 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6804 case SGPR_NULL:
6805 return isGFX10Plus();
6806 case SRC_EXECZ:
6807 case SRC_VCCZ:
6808 return !isGFX11Plus();
6809 default:
6810 break;
6811 }
6812
6813 if (isCI())
6814 return true;
6815
6816 if (isSI() || isGFX10Plus()) {
6817 // No flat_scr on SI.
6818 // On GFX10Plus flat scratch is not a valid register operand and can only be
6819 // accessed with s_setreg/s_getreg.
6820 switch (Reg.id()) {
6821 case FLAT_SCR:
6822 case FLAT_SCR_LO:
6823 case FLAT_SCR_HI:
6824 return false;
6825 default:
6826 return true;
6827 }
6828 }
6829
6830 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6831 // SI/CI have.
6832 if (MRI.regsOverlap(SGPR102_SGPR103, Reg))
6833 return hasSGPR102_SGPR103();
6834
6835 return true;
6836}
6837
6838ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6839 StringRef Mnemonic,
6840 OperandMode Mode) {
6841 ParseStatus Res = parseVOPD(Operands);
6842 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6843 return Res;
6844
6845 // Try to parse with a custom parser
6846 Res = MatchOperandParserImpl(Operands, Mnemonic);
6847
6848 // If we successfully parsed the operand or if there as an error parsing,
6849 // we are done.
6850 //
6851 // If we are parsing after we reach EndOfStatement then this means we
6852 // are appending default values to the Operands list. This is only done
6853 // by custom parser, so we shouldn't continue on to the generic parsing.
6854 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6855 return Res;
6856
6857 SMLoc RBraceLoc;
6858 SMLoc LBraceLoc = getLoc();
6859 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6860 unsigned Prefix = Operands.size();
6861
6862 for (;;) {
6863 auto Loc = getLoc();
6864 Res = parseReg(Operands);
6865 if (Res.isNoMatch())
6866 Error(Loc, "expected a register");
6867 if (!Res.isSuccess())
6868 return ParseStatus::Failure;
6869
6870 RBraceLoc = getLoc();
6871 if (trySkipToken(AsmToken::RBrac))
6872 break;
6873
6874 if (!skipToken(AsmToken::Comma,
6875 "expected a comma or a closing square bracket"))
6876 return ParseStatus::Failure;
6877 }
6878
6879 if (Operands.size() - Prefix > 1) {
6880 Operands.insert(Operands.begin() + Prefix,
6881 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6882 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6883 }
6884
6885 return ParseStatus::Success;
6886 }
6887
6888 return parseRegOrImm(Operands);
6889}
6890
6891StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6892 // Clear any forced encodings from the previous instruction.
6893 setForcedEncodingSize(0);
6894 setForcedDPP(false);
6895 setForcedSDWA(false);
6896
6897 if (Name.consume_back("_e64_dpp")) {
6898 setForcedDPP(true);
6899 setForcedEncodingSize(64);
6900 return Name;
6901 }
6902 if (Name.consume_back("_e64")) {
6903 setForcedEncodingSize(64);
6904 return Name;
6905 }
6906 if (Name.consume_back("_e32")) {
6907 setForcedEncodingSize(32);
6908 return Name;
6909 }
6910 if (Name.consume_back("_dpp")) {
6911 setForcedDPP(true);
6912 return Name;
6913 }
6914 if (Name.consume_back("_sdwa")) {
6915 setForcedSDWA(true);
6916 return Name;
6917 }
6918 return Name;
6919}
6920
6921static void applyMnemonicAliases(StringRef &Mnemonic,
6922 const FeatureBitset &Features,
6923 unsigned VariantID);
6924
6925bool AMDGPUAsmParser::parseInstruction(ParseInstructionInfo &Info,
6926 StringRef Name, SMLoc NameLoc,
6928 // Add the instruction mnemonic
6929 Name = parseMnemonicSuffix(Name);
6930
6931 // If the target architecture uses MnemonicAlias, call it here to parse
6932 // operands correctly.
6933 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6934
6935 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6936
6937 bool IsMIMG = Name.starts_with("image_");
6938
6939 while (!trySkipToken(AsmToken::EndOfStatement)) {
6940 OperandMode Mode = OperandMode_Default;
6941 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6942 Mode = OperandMode_NSA;
6943 ParseStatus Res = parseOperand(Operands, Name, Mode);
6944
6945 if (!Res.isSuccess()) {
6946 checkUnsupportedInstruction(Name, NameLoc);
6947 if (!Parser.hasPendingError()) {
6948 // FIXME: use real operand location rather than the current location.
6949 StringRef Msg = Res.isFailure() ? "failed parsing operand."
6950 : "not a valid operand.";
6951 Error(getLoc(), Msg);
6952 }
6953 while (!trySkipToken(AsmToken::EndOfStatement)) {
6954 lex();
6955 }
6956 return true;
6957 }
6958
6959 // Eat the comma or space if there is one.
6960 trySkipToken(AsmToken::Comma);
6961 }
6962
6963 return false;
6964}
6965
6966//===----------------------------------------------------------------------===//
6967// Utility functions
6968//===----------------------------------------------------------------------===//
6969
6970ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6972 SMLoc S = getLoc();
6973 if (!trySkipId(Name))
6974 return ParseStatus::NoMatch;
6975
6976 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
6977 return ParseStatus::Success;
6978}
6979
6980ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
6981 int64_t &IntVal) {
6982
6983 if (!trySkipId(Prefix, AsmToken::Colon))
6984 return ParseStatus::NoMatch;
6985
6987}
6988
6989ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6990 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6991 std::function<bool(int64_t &)> ConvertResult) {
6992 SMLoc S = getLoc();
6993 int64_t Value = 0;
6994
6995 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
6996 if (!Res.isSuccess())
6997 return Res;
6998
6999 if (ConvertResult && !ConvertResult(Value)) {
7000 Error(S, "invalid " + StringRef(Prefix) + " value.");
7001 }
7002
7003 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
7004 return ParseStatus::Success;
7005}
7006
7007ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
7008 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7009 bool (*ConvertResult)(int64_t &)) {
7010 SMLoc S = getLoc();
7011 if (!trySkipId(Prefix, AsmToken::Colon))
7012 return ParseStatus::NoMatch;
7013
7014 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
7015 return ParseStatus::Failure;
7016
7017 unsigned Val = 0;
7018 const unsigned MaxSize = 4;
7019
7020 // FIXME: How to verify the number of elements matches the number of src
7021 // operands?
7022 for (int I = 0; ; ++I) {
7023 int64_t Op;
7024 SMLoc Loc = getLoc();
7025 if (!parseExpr(Op))
7026 return ParseStatus::Failure;
7027
7028 if (Op != 0 && Op != 1)
7029 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
7030
7031 Val |= (Op << I);
7032
7033 if (trySkipToken(AsmToken::RBrac))
7034 break;
7035
7036 if (I + 1 == MaxSize)
7037 return Error(getLoc(), "expected a closing square bracket");
7038
7039 if (!skipToken(AsmToken::Comma, "expected a comma"))
7040 return ParseStatus::Failure;
7041 }
7042
7043 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
7044 return ParseStatus::Success;
7045}
7046
7047ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
7049 AMDGPUOperand::ImmTy ImmTy) {
7050 int64_t Bit;
7051 SMLoc S = getLoc();
7052
7053 if (trySkipId(Name)) {
7054 Bit = 1;
7055 } else if (trySkipId("no", Name)) {
7056 Bit = 0;
7057 } else {
7058 return ParseStatus::NoMatch;
7059 }
7060
7061 if (Name == "r128" && !hasMIMG_R128())
7062 return Error(S, "r128 modifier is not supported on this GPU");
7063 if (Name == "a16" && !hasA16())
7064 return Error(S, "a16 modifier is not supported on this GPU");
7065
7066 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
7067 ImmTy = AMDGPUOperand::ImmTyR128A16;
7068
7069 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
7070 return ParseStatus::Success;
7071}
7072
7073unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
7074 bool &Disabling) const {
7075 Disabling = Id.consume_front("no");
7076
7077 if (isGFX940() && !Mnemo.starts_with("s_")) {
7078 return StringSwitch<unsigned>(Id)
7079 .Case("nt", AMDGPU::CPol::NT)
7080 .Case("sc0", AMDGPU::CPol::SC0)
7081 .Case("sc1", AMDGPU::CPol::SC1)
7082 .Default(0);
7083 }
7084
7085 return StringSwitch<unsigned>(Id)
7086 .Case("dlc", AMDGPU::CPol::DLC)
7087 .Case("glc", AMDGPU::CPol::GLC)
7088 .Case("scc", AMDGPU::CPol::SCC)
7089 .Case("slc", AMDGPU::CPol::SLC)
7090 .Default(0);
7091}
7092
7093ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
7094 if (isGFX12Plus()) {
7095 SMLoc StringLoc = getLoc();
7096
7097 int64_t CPolVal = 0;
7098 ParseStatus ResTH = ParseStatus::NoMatch;
7099 ParseStatus ResScope = ParseStatus::NoMatch;
7100 ParseStatus ResNV = ParseStatus::NoMatch;
7101 ParseStatus ResScal = ParseStatus::NoMatch;
7102
7103 for (;;) {
7104 if (ResTH.isNoMatch()) {
7105 int64_t TH;
7106 ResTH = parseTH(Operands, TH);
7107 if (ResTH.isFailure())
7108 return ResTH;
7109 if (ResTH.isSuccess()) {
7110 CPolVal |= TH;
7111 continue;
7112 }
7113 }
7114
7115 if (ResScope.isNoMatch()) {
7116 int64_t Scope;
7117 ResScope = parseScope(Operands, Scope);
7118 if (ResScope.isFailure())
7119 return ResScope;
7120 if (ResScope.isSuccess()) {
7121 CPolVal |= Scope;
7122 continue;
7123 }
7124 }
7125
7126 // NV bit exists on GFX12+, but does something starting from GFX1250.
7127 // Allow parsing on all GFX12 and fail on validation for better
7128 // diagnostics.
7129 if (ResNV.isNoMatch()) {
7130 if (trySkipId("nv")) {
7131 ResNV = ParseStatus::Success;
7132 CPolVal |= CPol::NV;
7133 continue;
7134 } else if (trySkipId("no", "nv")) {
7135 ResNV = ParseStatus::Success;
7136 continue;
7137 }
7138 }
7139
7140 if (ResScal.isNoMatch()) {
7141 if (trySkipId("scale_offset")) {
7142 ResScal = ParseStatus::Success;
7143 CPolVal |= CPol::SCAL;
7144 continue;
7145 } else if (trySkipId("no", "scale_offset")) {
7146 ResScal = ParseStatus::Success;
7147 continue;
7148 }
7149 }
7150
7151 break;
7152 }
7153
7154 if (ResTH.isNoMatch() && ResScope.isNoMatch() && ResNV.isNoMatch() &&
7155 ResScal.isNoMatch())
7156 return ParseStatus::NoMatch;
7157
7158 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
7159 AMDGPUOperand::ImmTyCPol));
7160 return ParseStatus::Success;
7161 }
7162
7163 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7164 SMLoc OpLoc = getLoc();
7165 unsigned Enabled = 0, Seen = 0;
7166 for (;;) {
7167 SMLoc S = getLoc();
7168 bool Disabling;
7169 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
7170 if (!CPol)
7171 break;
7172
7173 lex();
7174
7175 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
7176 return Error(S, "dlc modifier is not supported on this GPU");
7177
7178 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
7179 return Error(S, "scc modifier is not supported on this GPU");
7180
7181 if (Seen & CPol)
7182 return Error(S, "duplicate cache policy modifier");
7183
7184 if (!Disabling)
7185 Enabled |= CPol;
7186
7187 Seen |= CPol;
7188 }
7189
7190 if (!Seen)
7191 return ParseStatus::NoMatch;
7192
7193 Operands.push_back(
7194 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
7195 return ParseStatus::Success;
7196}
7197
7198ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
7199 int64_t &Scope) {
7200 static const unsigned Scopes[] = {CPol::SCOPE_CU, CPol::SCOPE_SE,
7202
7203 ParseStatus Res = parseStringOrIntWithPrefix(
7204 Operands, "scope", {"SCOPE_CU", "SCOPE_SE", "SCOPE_DEV", "SCOPE_SYS"},
7205 Scope);
7206
7207 if (Res.isSuccess())
7208 Scope = Scopes[Scope];
7209
7210 return Res;
7211}
7212
7213ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
7214 TH = AMDGPU::CPol::TH_RT; // default
7215
7216 StringRef Value;
7217 SMLoc StringLoc;
7218 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
7219 if (!Res.isSuccess())
7220 return Res;
7221
7222 if (Value == "TH_DEFAULT")
7224 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_WB" ||
7225 Value == "TH_LOAD_NT_WB") {
7226 return Error(StringLoc, "invalid th value");
7227 } else if (Value.consume_front("TH_ATOMIC_")) {
7229 } else if (Value.consume_front("TH_LOAD_")) {
7231 } else if (Value.consume_front("TH_STORE_")) {
7233 } else {
7234 return Error(StringLoc, "invalid th value");
7235 }
7236
7237 if (Value == "BYPASS")
7239
7240 if (TH != 0) {
7242 TH |= StringSwitch<int64_t>(Value)
7243 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7244 .Case("RT", AMDGPU::CPol::TH_RT)
7245 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7246 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT)
7247 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
7249 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE)
7250 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
7252 .Default(0xffffffff);
7253 else
7254 TH |= StringSwitch<int64_t>(Value)
7255 .Case("RT", AMDGPU::CPol::TH_RT)
7256 .Case("NT", AMDGPU::CPol::TH_NT)
7257 .Case("HT", AMDGPU::CPol::TH_HT)
7258 .Case("LU", AMDGPU::CPol::TH_LU)
7259 .Case("WB", AMDGPU::CPol::TH_WB)
7260 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
7261 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
7262 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
7263 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
7264 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
7265 .Default(0xffffffff);
7266 }
7267
7268 if (TH == 0xffffffff)
7269 return Error(StringLoc, "invalid th value");
7270
7271 return ParseStatus::Success;
7272}
7273
7274static void
7276 AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
7277 AMDGPUOperand::ImmTy ImmT, int64_t Default = 0,
7278 std::optional<unsigned> InsertAt = std::nullopt) {
7279 auto i = OptionalIdx.find(ImmT);
7280 if (i != OptionalIdx.end()) {
7281 unsigned Idx = i->second;
7282 const AMDGPUOperand &Op =
7283 static_cast<const AMDGPUOperand &>(*Operands[Idx]);
7284 if (InsertAt)
7285 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Op.getImm()));
7286 else
7287 Op.addImmOperands(Inst, 1);
7288 } else {
7289 if (InsertAt.has_value())
7290 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Default));
7291 else
7293 }
7294}
7295
7296ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
7297 StringRef &Value,
7298 SMLoc &StringLoc) {
7299 if (!trySkipId(Prefix, AsmToken::Colon))
7300 return ParseStatus::NoMatch;
7301
7302 StringLoc = getLoc();
7303 return parseId(Value, "expected an identifier") ? ParseStatus::Success
7305}
7306
7307ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7308 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7309 int64_t &IntVal) {
7310 if (!trySkipId(Name, AsmToken::Colon))
7311 return ParseStatus::NoMatch;
7312
7313 SMLoc StringLoc = getLoc();
7314
7315 StringRef Value;
7316 if (isToken(AsmToken::Identifier)) {
7317 Value = getTokenStr();
7318 lex();
7319
7320 for (IntVal = 0; IntVal < (int64_t)Ids.size(); ++IntVal)
7321 if (Value == Ids[IntVal])
7322 break;
7323 } else if (!parseExpr(IntVal))
7324 return ParseStatus::Failure;
7325
7326 if (IntVal < 0 || IntVal >= (int64_t)Ids.size())
7327 return Error(StringLoc, "invalid " + Twine(Name) + " value");
7328
7329 return ParseStatus::Success;
7330}
7331
7332ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7333 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7334 AMDGPUOperand::ImmTy Type) {
7335 SMLoc S = getLoc();
7336 int64_t IntVal;
7337
7338 ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
7339 if (Res.isSuccess())
7340 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S, Type));
7341
7342 return Res;
7343}
7344
7345//===----------------------------------------------------------------------===//
7346// MTBUF format
7347//===----------------------------------------------------------------------===//
7348
7349bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
7350 int64_t MaxVal,
7351 int64_t &Fmt) {
7352 int64_t Val;
7353 SMLoc Loc = getLoc();
7354
7355 auto Res = parseIntWithPrefix(Pref, Val);
7356 if (Res.isFailure())
7357 return false;
7358 if (Res.isNoMatch())
7359 return true;
7360
7361 if (Val < 0 || Val > MaxVal) {
7362 Error(Loc, Twine("out of range ", StringRef(Pref)));
7363 return false;
7364 }
7365
7366 Fmt = Val;
7367 return true;
7368}
7369
7370ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
7371 AMDGPUOperand::ImmTy ImmTy) {
7372 const char *Pref = "index_key";
7373 int64_t ImmVal = 0;
7374 SMLoc Loc = getLoc();
7375 auto Res = parseIntWithPrefix(Pref, ImmVal);
7376 if (!Res.isSuccess())
7377 return Res;
7378
7379 if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
7380 ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
7381 (ImmVal < 0 || ImmVal > 1))
7382 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7383
7384 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
7385 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7386
7387 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
7388 return ParseStatus::Success;
7389}
7390
7391ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
7392 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
7393}
7394
7395ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
7396 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
7397}
7398
7399ParseStatus AMDGPUAsmParser::parseIndexKey32bit(OperandVector &Operands) {
7400 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey32bit);
7401}
7402
7403ParseStatus AMDGPUAsmParser::tryParseMatrixFMT(OperandVector &Operands,
7404 StringRef Name,
7405 AMDGPUOperand::ImmTy Type) {
7406 return parseStringOrIntWithPrefix(Operands, Name,
7407 {"MATRIX_FMT_FP8", "MATRIX_FMT_BF8",
7408 "MATRIX_FMT_FP6", "MATRIX_FMT_BF6",
7409 "MATRIX_FMT_FP4"},
7410 Type);
7411}
7412
7413ParseStatus AMDGPUAsmParser::parseMatrixAFMT(OperandVector &Operands) {
7414 return tryParseMatrixFMT(Operands, "matrix_a_fmt",
7415 AMDGPUOperand::ImmTyMatrixAFMT);
7416}
7417
7418ParseStatus AMDGPUAsmParser::parseMatrixBFMT(OperandVector &Operands) {
7419 return tryParseMatrixFMT(Operands, "matrix_b_fmt",
7420 AMDGPUOperand::ImmTyMatrixBFMT);
7421}
7422
7423ParseStatus AMDGPUAsmParser::tryParseMatrixScale(OperandVector &Operands,
7424 StringRef Name,
7425 AMDGPUOperand::ImmTy Type) {
7426 return parseStringOrIntWithPrefix(
7427 Operands, Name, {"MATRIX_SCALE_ROW0", "MATRIX_SCALE_ROW1"}, Type);
7428}
7429
7430ParseStatus AMDGPUAsmParser::parseMatrixAScale(OperandVector &Operands) {
7431 return tryParseMatrixScale(Operands, "matrix_a_scale",
7432 AMDGPUOperand::ImmTyMatrixAScale);
7433}
7434
7435ParseStatus AMDGPUAsmParser::parseMatrixBScale(OperandVector &Operands) {
7436 return tryParseMatrixScale(Operands, "matrix_b_scale",
7437 AMDGPUOperand::ImmTyMatrixBScale);
7438}
7439
7440ParseStatus AMDGPUAsmParser::tryParseMatrixScaleFmt(OperandVector &Operands,
7441 StringRef Name,
7442 AMDGPUOperand::ImmTy Type) {
7443 return parseStringOrIntWithPrefix(
7444 Operands, Name,
7445 {"MATRIX_SCALE_FMT_E8", "MATRIX_SCALE_FMT_E5M3", "MATRIX_SCALE_FMT_E4M3"},
7446 Type);
7447}
7448
7449ParseStatus AMDGPUAsmParser::parseMatrixAScaleFmt(OperandVector &Operands) {
7450 return tryParseMatrixScaleFmt(Operands, "matrix_a_scale_fmt",
7451 AMDGPUOperand::ImmTyMatrixAScaleFmt);
7452}
7453
7454ParseStatus AMDGPUAsmParser::parseMatrixBScaleFmt(OperandVector &Operands) {
7455 return tryParseMatrixScaleFmt(Operands, "matrix_b_scale_fmt",
7456 AMDGPUOperand::ImmTyMatrixBScaleFmt);
7457}
7458
7459// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
7460// values to live in a joint format operand in the MCInst encoding.
7461ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
7462 using namespace llvm::AMDGPU::MTBUFFormat;
7463
7464 int64_t Dfmt = DFMT_UNDEF;
7465 int64_t Nfmt = NFMT_UNDEF;
7466
7467 // dfmt and nfmt can appear in either order, and each is optional.
7468 for (int I = 0; I < 2; ++I) {
7469 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
7470 return ParseStatus::Failure;
7471
7472 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
7473 return ParseStatus::Failure;
7474
7475 // Skip optional comma between dfmt/nfmt
7476 // but guard against 2 commas following each other.
7477 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
7478 !peekToken().is(AsmToken::Comma)) {
7479 trySkipToken(AsmToken::Comma);
7480 }
7481 }
7482
7483 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
7484 return ParseStatus::NoMatch;
7485
7486 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7487 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7488
7489 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7490 return ParseStatus::Success;
7491}
7492
7493ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
7494 using namespace llvm::AMDGPU::MTBUFFormat;
7495
7496 int64_t Fmt = UFMT_UNDEF;
7497
7498 if (!tryParseFmt("format", UFMT_MAX, Fmt))
7499 return ParseStatus::Failure;
7500
7501 if (Fmt == UFMT_UNDEF)
7502 return ParseStatus::NoMatch;
7503
7504 Format = Fmt;
7505 return ParseStatus::Success;
7506}
7507
7508bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
7509 int64_t &Nfmt,
7510 StringRef FormatStr,
7511 SMLoc Loc) {
7512 using namespace llvm::AMDGPU::MTBUFFormat;
7513 int64_t Format;
7514
7515 Format = getDfmt(FormatStr);
7516 if (Format != DFMT_UNDEF) {
7517 Dfmt = Format;
7518 return true;
7519 }
7520
7521 Format = getNfmt(FormatStr, getSTI());
7522 if (Format != NFMT_UNDEF) {
7523 Nfmt = Format;
7524 return true;
7525 }
7526
7527 Error(Loc, "unsupported format");
7528 return false;
7529}
7530
7531ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7532 SMLoc FormatLoc,
7533 int64_t &Format) {
7534 using namespace llvm::AMDGPU::MTBUFFormat;
7535
7536 int64_t Dfmt = DFMT_UNDEF;
7537 int64_t Nfmt = NFMT_UNDEF;
7538 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
7539 return ParseStatus::Failure;
7540
7541 if (trySkipToken(AsmToken::Comma)) {
7542 StringRef Str;
7543 SMLoc Loc = getLoc();
7544 if (!parseId(Str, "expected a format string") ||
7545 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
7546 return ParseStatus::Failure;
7547 if (Dfmt == DFMT_UNDEF)
7548 return Error(Loc, "duplicate numeric format");
7549 if (Nfmt == NFMT_UNDEF)
7550 return Error(Loc, "duplicate data format");
7551 }
7552
7553 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7554 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7555
7556 if (isGFX10Plus()) {
7557 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
7558 if (Ufmt == UFMT_UNDEF)
7559 return Error(FormatLoc, "unsupported format");
7560 Format = Ufmt;
7561 } else {
7562 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7563 }
7564
7565 return ParseStatus::Success;
7566}
7567
7568ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7569 SMLoc Loc,
7570 int64_t &Format) {
7571 using namespace llvm::AMDGPU::MTBUFFormat;
7572
7573 auto Id = getUnifiedFormat(FormatStr, getSTI());
7574 if (Id == UFMT_UNDEF)
7575 return ParseStatus::NoMatch;
7576
7577 if (!isGFX10Plus())
7578 return Error(Loc, "unified format is not supported on this GPU");
7579
7580 Format = Id;
7581 return ParseStatus::Success;
7582}
7583
7584ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
7585 using namespace llvm::AMDGPU::MTBUFFormat;
7586 SMLoc Loc = getLoc();
7587
7588 if (!parseExpr(Format))
7589 return ParseStatus::Failure;
7590 if (!isValidFormatEncoding(Format, getSTI()))
7591 return Error(Loc, "out of range format");
7592
7593 return ParseStatus::Success;
7594}
7595
7596ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
7597 using namespace llvm::AMDGPU::MTBUFFormat;
7598
7599 if (!trySkipId("format", AsmToken::Colon))
7600 return ParseStatus::NoMatch;
7601
7602 if (trySkipToken(AsmToken::LBrac)) {
7603 StringRef FormatStr;
7604 SMLoc Loc = getLoc();
7605 if (!parseId(FormatStr, "expected a format string"))
7606 return ParseStatus::Failure;
7607
7608 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
7609 if (Res.isNoMatch())
7610 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
7611 if (!Res.isSuccess())
7612 return Res;
7613
7614 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7615 return ParseStatus::Failure;
7616
7617 return ParseStatus::Success;
7618 }
7619
7620 return parseNumericFormat(Format);
7621}
7622
7623ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
7624 using namespace llvm::AMDGPU::MTBUFFormat;
7625
7626 int64_t Format = getDefaultFormatEncoding(getSTI());
7627 ParseStatus Res;
7628 SMLoc Loc = getLoc();
7629
7630 // Parse legacy format syntax.
7631 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
7632 if (Res.isFailure())
7633 return Res;
7634
7635 bool FormatFound = Res.isSuccess();
7636
7637 Operands.push_back(
7638 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7639
7640 if (FormatFound)
7641 trySkipToken(AsmToken::Comma);
7642
7643 if (isToken(AsmToken::EndOfStatement)) {
7644 // We are expecting an soffset operand,
7645 // but let matcher handle the error.
7646 return ParseStatus::Success;
7647 }
7648
7649 // Parse soffset.
7650 Res = parseRegOrImm(Operands);
7651 if (!Res.isSuccess())
7652 return Res;
7653
7654 trySkipToken(AsmToken::Comma);
7655
7656 if (!FormatFound) {
7657 Res = parseSymbolicOrNumericFormat(Format);
7658 if (Res.isFailure())
7659 return Res;
7660 if (Res.isSuccess()) {
7661 auto Size = Operands.size();
7662 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
7663 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7664 Op.setImm(Format);
7665 }
7666 return ParseStatus::Success;
7667 }
7668
7669 if (isId("format") && peekToken().is(AsmToken::Colon))
7670 return Error(getLoc(), "duplicate format");
7671 return ParseStatus::Success;
7672}
7673
7674ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
7675 ParseStatus Res =
7676 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
7677 if (Res.isNoMatch()) {
7678 Res = parseIntWithPrefix("inst_offset", Operands,
7679 AMDGPUOperand::ImmTyInstOffset);
7680 }
7681 return Res;
7682}
7683
7684ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
7685 ParseStatus Res =
7686 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
7687 if (Res.isNoMatch())
7688 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
7689 return Res;
7690}
7691
7692ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7693 ParseStatus Res =
7694 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7695 if (Res.isNoMatch()) {
7696 Res =
7697 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
7698 }
7699 return Res;
7700}
7701
7702//===----------------------------------------------------------------------===//
7703// Exp
7704//===----------------------------------------------------------------------===//
7705
7706void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7707 OptionalImmIndexMap OptionalIdx;
7708
7709 unsigned OperandIdx[4];
7710 unsigned EnMask = 0;
7711 int SrcIdx = 0;
7712
7713 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7714 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7715
7716 // Add the register arguments
7717 if (Op.isReg()) {
7718 assert(SrcIdx < 4);
7719 OperandIdx[SrcIdx] = Inst.size();
7720 Op.addRegOperands(Inst, 1);
7721 ++SrcIdx;
7722 continue;
7723 }
7724
7725 if (Op.isOff()) {
7726 assert(SrcIdx < 4);
7727 OperandIdx[SrcIdx] = Inst.size();
7728 Inst.addOperand(MCOperand::createReg(MCRegister()));
7729 ++SrcIdx;
7730 continue;
7731 }
7732
7733 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7734 Op.addImmOperands(Inst, 1);
7735 continue;
7736 }
7737
7738 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7739 continue;
7740
7741 // Handle optional arguments
7742 OptionalIdx[Op.getImmTy()] = i;
7743 }
7744
7745 assert(SrcIdx == 4);
7746
7747 bool Compr = false;
7748 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7749 Compr = true;
7750 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
7751 Inst.getOperand(OperandIdx[2]).setReg(MCRegister());
7752 Inst.getOperand(OperandIdx[3]).setReg(MCRegister());
7753 }
7754
7755 for (auto i = 0; i < SrcIdx; ++i) {
7756 if (Inst.getOperand(OperandIdx[i]).getReg()) {
7757 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7758 }
7759 }
7760
7761 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
7762 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
7763
7764 Inst.addOperand(MCOperand::createImm(EnMask));
7765}
7766
7767//===----------------------------------------------------------------------===//
7768// s_waitcnt
7769//===----------------------------------------------------------------------===//
7770
7771static bool
7773 const AMDGPU::IsaVersion ISA,
7774 int64_t &IntVal,
7775 int64_t CntVal,
7776 bool Saturate,
7777 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7778 unsigned (*decode)(const IsaVersion &Version, unsigned))
7779{
7780 bool Failed = false;
7781
7782 IntVal = encode(ISA, IntVal, CntVal);
7783 if (CntVal != decode(ISA, IntVal)) {
7784 if (Saturate) {
7785 IntVal = encode(ISA, IntVal, -1);
7786 } else {
7787 Failed = true;
7788 }
7789 }
7790 return Failed;
7791}
7792
7793bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7794
7795 SMLoc CntLoc = getLoc();
7796 StringRef CntName = getTokenStr();
7797
7798 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7799 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7800 return false;
7801
7802 int64_t CntVal;
7803 SMLoc ValLoc = getLoc();
7804 if (!parseExpr(CntVal))
7805 return false;
7806
7807 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7808
7809 bool Failed = true;
7810 bool Sat = CntName.ends_with("_sat");
7811
7812 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7813 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
7814 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7815 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
7816 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7817 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
7818 } else {
7819 Error(CntLoc, "invalid counter name " + CntName);
7820 return false;
7821 }
7822
7823 if (Failed) {
7824 Error(ValLoc, "too large value for " + CntName);
7825 return false;
7826 }
7827
7828 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7829 return false;
7830
7831 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7832 if (isToken(AsmToken::EndOfStatement)) {
7833 Error(getLoc(), "expected a counter name");
7834 return false;
7835 }
7836 }
7837
7838 return true;
7839}
7840
7841ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7842 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7843 int64_t Waitcnt = getWaitcntBitMask(ISA);
7844 SMLoc S = getLoc();
7845
7846 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7847 while (!isToken(AsmToken::EndOfStatement)) {
7848 if (!parseCnt(Waitcnt))
7849 return ParseStatus::Failure;
7850 }
7851 } else {
7852 if (!parseExpr(Waitcnt))
7853 return ParseStatus::Failure;
7854 }
7855
7856 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
7857 return ParseStatus::Success;
7858}
7859
7860bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7861 SMLoc FieldLoc = getLoc();
7862 StringRef FieldName = getTokenStr();
7863 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
7864 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7865 return false;
7866
7867 SMLoc ValueLoc = getLoc();
7868 StringRef ValueName = getTokenStr();
7869 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
7870 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
7871 return false;
7872
7873 unsigned Shift;
7874 if (FieldName == "instid0") {
7875 Shift = 0;
7876 } else if (FieldName == "instskip") {
7877 Shift = 4;
7878 } else if (FieldName == "instid1") {
7879 Shift = 7;
7880 } else {
7881 Error(FieldLoc, "invalid field name " + FieldName);
7882 return false;
7883 }
7884
7885 int Value;
7886 if (Shift == 4) {
7887 // Parse values for instskip.
7888 Value = StringSwitch<int>(ValueName)
7889 .Case("SAME", 0)
7890 .Case("NEXT", 1)
7891 .Case("SKIP_1", 2)
7892 .Case("SKIP_2", 3)
7893 .Case("SKIP_3", 4)
7894 .Case("SKIP_4", 5)
7895 .Default(-1);
7896 } else {
7897 // Parse values for instid0 and instid1.
7898 Value = StringSwitch<int>(ValueName)
7899 .Case("NO_DEP", 0)
7900 .Case("VALU_DEP_1", 1)
7901 .Case("VALU_DEP_2", 2)
7902 .Case("VALU_DEP_3", 3)
7903 .Case("VALU_DEP_4", 4)
7904 .Case("TRANS32_DEP_1", 5)
7905 .Case("TRANS32_DEP_2", 6)
7906 .Case("TRANS32_DEP_3", 7)
7907 .Case("FMA_ACCUM_CYCLE_1", 8)
7908 .Case("SALU_CYCLE_1", 9)
7909 .Case("SALU_CYCLE_2", 10)
7910 .Case("SALU_CYCLE_3", 11)
7911 .Default(-1);
7912 }
7913 if (Value < 0) {
7914 Error(ValueLoc, "invalid value name " + ValueName);
7915 return false;
7916 }
7917
7918 Delay |= Value << Shift;
7919 return true;
7920}
7921
7922ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7923 int64_t Delay = 0;
7924 SMLoc S = getLoc();
7925
7926 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7927 do {
7928 if (!parseDelay(Delay))
7929 return ParseStatus::Failure;
7930 } while (trySkipToken(AsmToken::Pipe));
7931 } else {
7932 if (!parseExpr(Delay))
7933 return ParseStatus::Failure;
7934 }
7935
7936 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
7937 return ParseStatus::Success;
7938}
7939
7940bool
7941AMDGPUOperand::isSWaitCnt() const {
7942 return isImm();
7943}
7944
7945bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7946
7947//===----------------------------------------------------------------------===//
7948// DepCtr
7949//===----------------------------------------------------------------------===//
7950
7951void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7952 StringRef DepCtrName) {
7953 switch (ErrorId) {
7954 case OPR_ID_UNKNOWN:
7955 Error(Loc, Twine("invalid counter name ", DepCtrName));
7956 return;
7957 case OPR_ID_UNSUPPORTED:
7958 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
7959 return;
7960 case OPR_ID_DUPLICATE:
7961 Error(Loc, Twine("duplicate counter name ", DepCtrName));
7962 return;
7963 case OPR_VAL_INVALID:
7964 Error(Loc, Twine("invalid value for ", DepCtrName));
7965 return;
7966 default:
7967 assert(false);
7968 }
7969}
7970
7971bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7972
7973 using namespace llvm::AMDGPU::DepCtr;
7974
7975 SMLoc DepCtrLoc = getLoc();
7976 StringRef DepCtrName = getTokenStr();
7977
7978 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7979 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7980 return false;
7981
7982 int64_t ExprVal;
7983 if (!parseExpr(ExprVal))
7984 return false;
7985
7986 unsigned PrevOprMask = UsedOprMask;
7987 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7988
7989 if (CntVal < 0) {
7990 depCtrError(DepCtrLoc, CntVal, DepCtrName);
7991 return false;
7992 }
7993
7994 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7995 return false;
7996
7997 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7998 if (isToken(AsmToken::EndOfStatement)) {
7999 Error(getLoc(), "expected a counter name");
8000 return false;
8001 }
8002 }
8003
8004 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
8005 DepCtr = (DepCtr & ~CntValMask) | CntVal;
8006 return true;
8007}
8008
8009ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
8010 using namespace llvm::AMDGPU::DepCtr;
8011
8012 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
8013 SMLoc Loc = getLoc();
8014
8015 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
8016 unsigned UsedOprMask = 0;
8017 while (!isToken(AsmToken::EndOfStatement)) {
8018 if (!parseDepCtr(DepCtr, UsedOprMask))
8019 return ParseStatus::Failure;
8020 }
8021 } else {
8022 if (!parseExpr(DepCtr))
8023 return ParseStatus::Failure;
8024 }
8025
8026 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
8027 return ParseStatus::Success;
8028}
8029
8030bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
8031
8032//===----------------------------------------------------------------------===//
8033// hwreg
8034//===----------------------------------------------------------------------===//
8035
8036ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
8037 OperandInfoTy &Offset,
8038 OperandInfoTy &Width) {
8039 using namespace llvm::AMDGPU::Hwreg;
8040
8041 if (!trySkipId("hwreg", AsmToken::LParen))
8042 return ParseStatus::NoMatch;
8043
8044 // The register may be specified by name or using a numeric code
8045 HwReg.Loc = getLoc();
8046 if (isToken(AsmToken::Identifier) &&
8047 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8048 HwReg.IsSymbolic = true;
8049 lex(); // skip register name
8050 } else if (!parseExpr(HwReg.Val, "a register name")) {
8051 return ParseStatus::Failure;
8052 }
8053
8054 if (trySkipToken(AsmToken::RParen))
8055 return ParseStatus::Success;
8056
8057 // parse optional params
8058 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
8059 return ParseStatus::Failure;
8060
8061 Offset.Loc = getLoc();
8062 if (!parseExpr(Offset.Val))
8063 return ParseStatus::Failure;
8064
8065 if (!skipToken(AsmToken::Comma, "expected a comma"))
8066 return ParseStatus::Failure;
8067
8068 Width.Loc = getLoc();
8069 if (!parseExpr(Width.Val) ||
8070 !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8071 return ParseStatus::Failure;
8072
8073 return ParseStatus::Success;
8074}
8075
8076ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
8077 using namespace llvm::AMDGPU::Hwreg;
8078
8079 int64_t ImmVal = 0;
8080 SMLoc Loc = getLoc();
8081
8082 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
8083 HwregId::Default);
8084 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
8085 HwregOffset::Default);
8086 struct : StructuredOpField {
8087 using StructuredOpField::StructuredOpField;
8088 bool validate(AMDGPUAsmParser &Parser) const override {
8089 if (!isUIntN(Width, Val - 1))
8090 return Error(Parser, "only values from 1 to 32 are legal");
8091 return true;
8092 }
8093 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
8094 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
8095
8096 if (Res.isNoMatch())
8097 Res = parseHwregFunc(HwReg, Offset, Width);
8098
8099 if (Res.isSuccess()) {
8100 if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
8101 return ParseStatus::Failure;
8102 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
8103 }
8104
8105 if (Res.isNoMatch() &&
8106 parseExpr(ImmVal, "a hwreg macro, structured immediate"))
8108
8109 if (!Res.isSuccess())
8110 return ParseStatus::Failure;
8111
8112 if (!isUInt<16>(ImmVal))
8113 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8114 Operands.push_back(
8115 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
8116 return ParseStatus::Success;
8117}
8118
8119bool AMDGPUOperand::isHwreg() const {
8120 return isImmTy(ImmTyHwreg);
8121}
8122
8123//===----------------------------------------------------------------------===//
8124// sendmsg
8125//===----------------------------------------------------------------------===//
8126
8127bool
8128AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
8129 OperandInfoTy &Op,
8130 OperandInfoTy &Stream) {
8131 using namespace llvm::AMDGPU::SendMsg;
8132
8133 Msg.Loc = getLoc();
8134 if (isToken(AsmToken::Identifier) &&
8135 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8136 Msg.IsSymbolic = true;
8137 lex(); // skip message name
8138 } else if (!parseExpr(Msg.Val, "a message name")) {
8139 return false;
8140 }
8141
8142 if (trySkipToken(AsmToken::Comma)) {
8143 Op.IsDefined = true;
8144 Op.Loc = getLoc();
8145 if (isToken(AsmToken::Identifier) &&
8146 (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
8148 lex(); // skip operation name
8149 } else if (!parseExpr(Op.Val, "an operation name")) {
8150 return false;
8151 }
8152
8153 if (trySkipToken(AsmToken::Comma)) {
8154 Stream.IsDefined = true;
8155 Stream.Loc = getLoc();
8156 if (!parseExpr(Stream.Val))
8157 return false;
8158 }
8159 }
8160
8161 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
8162}
8163
8164bool
8165AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
8166 const OperandInfoTy &Op,
8167 const OperandInfoTy &Stream) {
8168 using namespace llvm::AMDGPU::SendMsg;
8169
8170 // Validation strictness depends on whether message is specified
8171 // in a symbolic or in a numeric form. In the latter case
8172 // only encoding possibility is checked.
8173 bool Strict = Msg.IsSymbolic;
8174
8175 if (Strict) {
8176 if (Msg.Val == OPR_ID_UNSUPPORTED) {
8177 Error(Msg.Loc, "specified message id is not supported on this GPU");
8178 return false;
8179 }
8180 } else {
8181 if (!isValidMsgId(Msg.Val, getSTI())) {
8182 Error(Msg.Loc, "invalid message id");
8183 return false;
8184 }
8185 }
8186 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
8187 if (Op.IsDefined) {
8188 Error(Op.Loc, "message does not support operations");
8189 } else {
8190 Error(Msg.Loc, "missing message operation");
8191 }
8192 return false;
8193 }
8194 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
8195 if (Op.Val == OPR_ID_UNSUPPORTED)
8196 Error(Op.Loc, "specified operation id is not supported on this GPU");
8197 else
8198 Error(Op.Loc, "invalid operation id");
8199 return false;
8200 }
8201 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
8202 Stream.IsDefined) {
8203 Error(Stream.Loc, "message operation does not support streams");
8204 return false;
8205 }
8206 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
8207 Error(Stream.Loc, "invalid message stream id");
8208 return false;
8209 }
8210 return true;
8211}
8212
8213ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
8214 using namespace llvm::AMDGPU::SendMsg;
8215
8216 int64_t ImmVal = 0;
8217 SMLoc Loc = getLoc();
8218
8219 if (trySkipId("sendmsg", AsmToken::LParen)) {
8220 OperandInfoTy Msg(OPR_ID_UNKNOWN);
8221 OperandInfoTy Op(OP_NONE_);
8222 OperandInfoTy Stream(STREAM_ID_NONE_);
8223 if (parseSendMsgBody(Msg, Op, Stream) &&
8224 validateSendMsg(Msg, Op, Stream)) {
8225 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
8226 } else {
8227 return ParseStatus::Failure;
8228 }
8229 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
8230 if (ImmVal < 0 || !isUInt<16>(ImmVal))
8231 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8232 } else {
8233 return ParseStatus::Failure;
8234 }
8235
8236 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
8237 return ParseStatus::Success;
8238}
8239
8240bool AMDGPUOperand::isSendMsg() const {
8241 return isImmTy(ImmTySendMsg);
8242}
8243
8244//===----------------------------------------------------------------------===//
8245// v_interp
8246//===----------------------------------------------------------------------===//
8247
8248ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
8249 StringRef Str;
8250 SMLoc S = getLoc();
8251
8252 if (!parseId(Str))
8253 return ParseStatus::NoMatch;
8254
8255 int Slot = StringSwitch<int>(Str)
8256 .Case("p10", 0)
8257 .Case("p20", 1)
8258 .Case("p0", 2)
8259 .Default(-1);
8260
8261 if (Slot == -1)
8262 return Error(S, "invalid interpolation slot");
8263
8264 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
8265 AMDGPUOperand::ImmTyInterpSlot));
8266 return ParseStatus::Success;
8267}
8268
8269ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
8270 StringRef Str;
8271 SMLoc S = getLoc();
8272
8273 if (!parseId(Str))
8274 return ParseStatus::NoMatch;
8275
8276 if (!Str.starts_with("attr"))
8277 return Error(S, "invalid interpolation attribute");
8278
8279 StringRef Chan = Str.take_back(2);
8280 int AttrChan = StringSwitch<int>(Chan)
8281 .Case(".x", 0)
8282 .Case(".y", 1)
8283 .Case(".z", 2)
8284 .Case(".w", 3)
8285 .Default(-1);
8286 if (AttrChan == -1)
8287 return Error(S, "invalid or missing interpolation attribute channel");
8288
8289 Str = Str.drop_back(2).drop_front(4);
8290
8291 uint8_t Attr;
8292 if (Str.getAsInteger(10, Attr))
8293 return Error(S, "invalid or missing interpolation attribute number");
8294
8295 if (Attr > 32)
8296 return Error(S, "out of bounds interpolation attribute number");
8297
8298 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
8299
8300 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
8301 AMDGPUOperand::ImmTyInterpAttr));
8302 Operands.push_back(AMDGPUOperand::CreateImm(
8303 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
8304 return ParseStatus::Success;
8305}
8306
8307//===----------------------------------------------------------------------===//
8308// exp
8309//===----------------------------------------------------------------------===//
8310
8311ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
8312 using namespace llvm::AMDGPU::Exp;
8313
8314 StringRef Str;
8315 SMLoc S = getLoc();
8316
8317 if (!parseId(Str))
8318 return ParseStatus::NoMatch;
8319
8320 unsigned Id = getTgtId(Str);
8321 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
8322 return Error(S, (Id == ET_INVALID)
8323 ? "invalid exp target"
8324 : "exp target is not supported on this GPU");
8325
8326 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
8327 AMDGPUOperand::ImmTyExpTgt));
8328 return ParseStatus::Success;
8329}
8330
8331//===----------------------------------------------------------------------===//
8332// parser helpers
8333//===----------------------------------------------------------------------===//
8334
8335bool
8336AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
8337 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
8338}
8339
8340bool
8341AMDGPUAsmParser::isId(const StringRef Id) const {
8342 return isId(getToken(), Id);
8343}
8344
8345bool
8346AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
8347 return getTokenKind() == Kind;
8348}
8349
8350StringRef AMDGPUAsmParser::getId() const {
8351 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
8352}
8353
8354bool
8355AMDGPUAsmParser::trySkipId(const StringRef Id) {
8356 if (isId(Id)) {
8357 lex();
8358 return true;
8359 }
8360 return false;
8361}
8362
8363bool
8364AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
8365 if (isToken(AsmToken::Identifier)) {
8366 StringRef Tok = getTokenStr();
8367 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
8368 lex();
8369 return true;
8370 }
8371 }
8372 return false;
8373}
8374
8375bool
8376AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
8377 if (isId(Id) && peekToken().is(Kind)) {
8378 lex();
8379 lex();
8380 return true;
8381 }
8382 return false;
8383}
8384
8385bool
8386AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
8387 if (isToken(Kind)) {
8388 lex();
8389 return true;
8390 }
8391 return false;
8392}
8393
8394bool
8395AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
8396 const StringRef ErrMsg) {
8397 if (!trySkipToken(Kind)) {
8398 Error(getLoc(), ErrMsg);
8399 return false;
8400 }
8401 return true;
8402}
8403
8404bool
8405AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
8406 SMLoc S = getLoc();
8407
8408 const MCExpr *Expr;
8409 if (Parser.parseExpression(Expr))
8410 return false;
8411
8412 if (Expr->evaluateAsAbsolute(Imm))
8413 return true;
8414
8415 if (Expected.empty()) {
8416 Error(S, "expected absolute expression");
8417 } else {
8418 Error(S, Twine("expected ", Expected) +
8419 Twine(" or an absolute expression"));
8420 }
8421 return false;
8422}
8423
8424bool
8425AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
8426 SMLoc S = getLoc();
8427
8428 const MCExpr *Expr;
8429 if (Parser.parseExpression(Expr))
8430 return false;
8431
8432 int64_t IntVal;
8433 if (Expr->evaluateAsAbsolute(IntVal)) {
8434 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
8435 } else {
8436 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
8437 }
8438 return true;
8439}
8440
8441bool
8442AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
8443 if (isToken(AsmToken::String)) {
8444 Val = getToken().getStringContents();
8445 lex();
8446 return true;
8447 }
8448 Error(getLoc(), ErrMsg);
8449 return false;
8450}
8451
8452bool
8453AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
8454 if (isToken(AsmToken::Identifier)) {
8455 Val = getTokenStr();
8456 lex();
8457 return true;
8458 }
8459 if (!ErrMsg.empty())
8460 Error(getLoc(), ErrMsg);
8461 return false;
8462}
8463
8464AsmToken
8465AMDGPUAsmParser::getToken() const {
8466 return Parser.getTok();
8467}
8468
8469AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
8470 return isToken(AsmToken::EndOfStatement)
8471 ? getToken()
8472 : getLexer().peekTok(ShouldSkipSpace);
8473}
8474
8475void
8476AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
8477 auto TokCount = getLexer().peekTokens(Tokens);
8478
8479 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
8480 Tokens[Idx] = AsmToken(AsmToken::Error, "");
8481}
8482
8484AMDGPUAsmParser::getTokenKind() const {
8485 return getLexer().getKind();
8486}
8487
8488SMLoc
8489AMDGPUAsmParser::getLoc() const {
8490 return getToken().getLoc();
8491}
8492
8493StringRef
8494AMDGPUAsmParser::getTokenStr() const {
8495 return getToken().getString();
8496}
8497
8498void
8499AMDGPUAsmParser::lex() {
8500 Parser.Lex();
8501}
8502
8503SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
8504 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
8505}
8506
8507// Returns one of the given locations that comes later in the source.
8508SMLoc AMDGPUAsmParser::getLaterLoc(SMLoc a, SMLoc b) {
8509 return a.getPointer() < b.getPointer() ? b : a;
8510}
8511
8512SMLoc AMDGPUAsmParser::getOperandLoc(const OperandVector &Operands,
8513 int MCOpIdx) const {
8514 for (const auto &Op : Operands) {
8515 const auto TargetOp = static_cast<AMDGPUOperand &>(*Op);
8516 if (TargetOp.getMCOpIdx() == MCOpIdx)
8517 return TargetOp.getStartLoc();
8518 }
8519 llvm_unreachable("No such MC operand!");
8520}
8521
8522SMLoc
8523AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
8524 const OperandVector &Operands) const {
8525 for (unsigned i = Operands.size() - 1; i > 0; --i) {
8526 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8527 if (Test(Op))
8528 return Op.getStartLoc();
8529 }
8530 return getInstLoc(Operands);
8531}
8532
8533SMLoc
8534AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
8535 const OperandVector &Operands) const {
8536 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
8537 return getOperandLoc(Test, Operands);
8538}
8539
8540ParseStatus
8541AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
8542 if (!trySkipToken(AsmToken::LCurly))
8543 return ParseStatus::NoMatch;
8544
8545 bool First = true;
8546 while (!trySkipToken(AsmToken::RCurly)) {
8547 if (!First &&
8548 !skipToken(AsmToken::Comma, "comma or closing brace expected"))
8549 return ParseStatus::Failure;
8550
8551 StringRef Id = getTokenStr();
8552 SMLoc IdLoc = getLoc();
8553 if (!skipToken(AsmToken::Identifier, "field name expected") ||
8554 !skipToken(AsmToken::Colon, "colon expected"))
8555 return ParseStatus::Failure;
8556
8557 const auto *I =
8558 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
8559 if (I == Fields.end())
8560 return Error(IdLoc, "unknown field");
8561 if ((*I)->IsDefined)
8562 return Error(IdLoc, "duplicate field");
8563
8564 // TODO: Support symbolic values.
8565 (*I)->Loc = getLoc();
8566 if (!parseExpr((*I)->Val))
8567 return ParseStatus::Failure;
8568 (*I)->IsDefined = true;
8569
8570 First = false;
8571 }
8572 return ParseStatus::Success;
8573}
8574
8575bool AMDGPUAsmParser::validateStructuredOpFields(
8577 return all_of(Fields, [this](const StructuredOpField *F) {
8578 return F->validate(*this);
8579 });
8580}
8581
8582//===----------------------------------------------------------------------===//
8583// swizzle
8584//===----------------------------------------------------------------------===//
8585
8587static unsigned
8588encodeBitmaskPerm(const unsigned AndMask,
8589 const unsigned OrMask,
8590 const unsigned XorMask) {
8591 using namespace llvm::AMDGPU::Swizzle;
8592
8593 return BITMASK_PERM_ENC |
8594 (AndMask << BITMASK_AND_SHIFT) |
8595 (OrMask << BITMASK_OR_SHIFT) |
8596 (XorMask << BITMASK_XOR_SHIFT);
8597}
8598
8599bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
8600 const unsigned MaxVal,
8601 const Twine &ErrMsg, SMLoc &Loc) {
8602 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8603 return false;
8604 }
8605 Loc = getLoc();
8606 if (!parseExpr(Op)) {
8607 return false;
8608 }
8609 if (Op < MinVal || Op > MaxVal) {
8610 Error(Loc, ErrMsg);
8611 return false;
8612 }
8613
8614 return true;
8615}
8616
8617bool
8618AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
8619 const unsigned MinVal,
8620 const unsigned MaxVal,
8621 const StringRef ErrMsg) {
8622 SMLoc Loc;
8623 for (unsigned i = 0; i < OpNum; ++i) {
8624 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
8625 return false;
8626 }
8627
8628 return true;
8629}
8630
8631bool
8632AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
8633 using namespace llvm::AMDGPU::Swizzle;
8634
8635 int64_t Lane[LANE_NUM];
8636 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8637 "expected a 2-bit lane id")) {
8639 for (unsigned I = 0; I < LANE_NUM; ++I) {
8640 Imm |= Lane[I] << (LANE_SHIFT * I);
8641 }
8642 return true;
8643 }
8644 return false;
8645}
8646
8647bool
8648AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
8649 using namespace llvm::AMDGPU::Swizzle;
8650
8651 SMLoc Loc;
8652 int64_t GroupSize;
8653 int64_t LaneIdx;
8654
8655 if (!parseSwizzleOperand(GroupSize,
8656 2, 32,
8657 "group size must be in the interval [2,32]",
8658 Loc)) {
8659 return false;
8660 }
8661 if (!isPowerOf2_64(GroupSize)) {
8662 Error(Loc, "group size must be a power of two");
8663 return false;
8664 }
8665 if (parseSwizzleOperand(LaneIdx,
8666 0, GroupSize - 1,
8667 "lane id must be in the interval [0,group size - 1]",
8668 Loc)) {
8669 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
8670 return true;
8671 }
8672 return false;
8673}
8674
8675bool
8676AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8677 using namespace llvm::AMDGPU::Swizzle;
8678
8679 SMLoc Loc;
8680 int64_t GroupSize;
8681
8682 if (!parseSwizzleOperand(GroupSize,
8683 2, 32,
8684 "group size must be in the interval [2,32]",
8685 Loc)) {
8686 return false;
8687 }
8688 if (!isPowerOf2_64(GroupSize)) {
8689 Error(Loc, "group size must be a power of two");
8690 return false;
8691 }
8692
8693 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
8694 return true;
8695}
8696
8697bool
8698AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8699 using namespace llvm::AMDGPU::Swizzle;
8700
8701 SMLoc Loc;
8702 int64_t GroupSize;
8703
8704 if (!parseSwizzleOperand(GroupSize,
8705 1, 16,
8706 "group size must be in the interval [1,16]",
8707 Loc)) {
8708 return false;
8709 }
8710 if (!isPowerOf2_64(GroupSize)) {
8711 Error(Loc, "group size must be a power of two");
8712 return false;
8713 }
8714
8715 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
8716 return true;
8717}
8718
8719bool
8720AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8721 using namespace llvm::AMDGPU::Swizzle;
8722
8723 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8724 return false;
8725 }
8726
8727 StringRef Ctl;
8728 SMLoc StrLoc = getLoc();
8729 if (!parseString(Ctl)) {
8730 return false;
8731 }
8732 if (Ctl.size() != BITMASK_WIDTH) {
8733 Error(StrLoc, "expected a 5-character mask");
8734 return false;
8735 }
8736
8737 unsigned AndMask = 0;
8738 unsigned OrMask = 0;
8739 unsigned XorMask = 0;
8740
8741 for (size_t i = 0; i < Ctl.size(); ++i) {
8742 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8743 switch(Ctl[i]) {
8744 default:
8745 Error(StrLoc, "invalid mask");
8746 return false;
8747 case '0':
8748 break;
8749 case '1':
8750 OrMask |= Mask;
8751 break;
8752 case 'p':
8753 AndMask |= Mask;
8754 break;
8755 case 'i':
8756 AndMask |= Mask;
8757 XorMask |= Mask;
8758 break;
8759 }
8760 }
8761
8762 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8763 return true;
8764}
8765
8766bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &Imm) {
8767 using namespace llvm::AMDGPU::Swizzle;
8768
8769 if (!AMDGPU::isGFX9Plus(getSTI())) {
8770 Error(getLoc(), "FFT mode swizzle not supported on this GPU");
8771 return false;
8772 }
8773
8774 int64_t Swizzle;
8775 SMLoc Loc;
8776 if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
8777 "FFT swizzle must be in the interval [0," +
8778 Twine(FFT_SWIZZLE_MAX) + Twine(']'),
8779 Loc))
8780 return false;
8781
8782 Imm = FFT_MODE_ENC | Swizzle;
8783 return true;
8784}
8785
8786bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &Imm) {
8787 using namespace llvm::AMDGPU::Swizzle;
8788
8789 if (!AMDGPU::isGFX9Plus(getSTI())) {
8790 Error(getLoc(), "Rotate mode swizzle not supported on this GPU");
8791 return false;
8792 }
8793
8794 SMLoc Loc;
8795 int64_t Direction;
8796
8797 if (!parseSwizzleOperand(Direction, 0, 1,
8798 "direction must be 0 (left) or 1 (right)", Loc))
8799 return false;
8800
8801 int64_t RotateSize;
8802 if (!parseSwizzleOperand(
8803 RotateSize, 0, ROTATE_MAX_SIZE,
8804 "number of threads to rotate must be in the interval [0," +
8805 Twine(ROTATE_MAX_SIZE) + Twine(']'),
8806 Loc))
8807 return false;
8808
8810 (RotateSize << ROTATE_SIZE_SHIFT);
8811 return true;
8812}
8813
8814bool
8815AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8816
8817 SMLoc OffsetLoc = getLoc();
8818
8819 if (!parseExpr(Imm, "a swizzle macro")) {
8820 return false;
8821 }
8822 if (!isUInt<16>(Imm)) {
8823 Error(OffsetLoc, "expected a 16-bit offset");
8824 return false;
8825 }
8826 return true;
8827}
8828
8829bool
8830AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8831 using namespace llvm::AMDGPU::Swizzle;
8832
8833 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
8834
8835 SMLoc ModeLoc = getLoc();
8836 bool Ok = false;
8837
8838 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8839 Ok = parseSwizzleQuadPerm(Imm);
8840 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8841 Ok = parseSwizzleBitmaskPerm(Imm);
8842 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8843 Ok = parseSwizzleBroadcast(Imm);
8844 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
8845 Ok = parseSwizzleSwap(Imm);
8846 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8847 Ok = parseSwizzleReverse(Imm);
8848 } else if (trySkipId(IdSymbolic[ID_FFT])) {
8849 Ok = parseSwizzleFFT(Imm);
8850 } else if (trySkipId(IdSymbolic[ID_ROTATE])) {
8851 Ok = parseSwizzleRotate(Imm);
8852 } else {
8853 Error(ModeLoc, "expected a swizzle mode");
8854 }
8855
8856 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
8857 }
8858
8859 return false;
8860}
8861
8862ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8863 SMLoc S = getLoc();
8864 int64_t Imm = 0;
8865
8866 if (trySkipId("offset")) {
8867
8868 bool Ok = false;
8869 if (skipToken(AsmToken::Colon, "expected a colon")) {
8870 if (trySkipId("swizzle")) {
8871 Ok = parseSwizzleMacro(Imm);
8872 } else {
8873 Ok = parseSwizzleOffset(Imm);
8874 }
8875 }
8876
8877 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8878
8880 }
8881 return ParseStatus::NoMatch;
8882}
8883
8884bool
8885AMDGPUOperand::isSwizzle() const {
8886 return isImmTy(ImmTySwizzle);
8887}
8888
8889//===----------------------------------------------------------------------===//
8890// VGPR Index Mode
8891//===----------------------------------------------------------------------===//
8892
8893int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8894
8895 using namespace llvm::AMDGPU::VGPRIndexMode;
8896
8897 if (trySkipToken(AsmToken::RParen)) {
8898 return OFF;
8899 }
8900
8901 int64_t Imm = 0;
8902
8903 while (true) {
8904 unsigned Mode = 0;
8905 SMLoc S = getLoc();
8906
8907 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8908 if (trySkipId(IdSymbolic[ModeId])) {
8909 Mode = 1 << ModeId;
8910 break;
8911 }
8912 }
8913
8914 if (Mode == 0) {
8915 Error(S, (Imm == 0)?
8916 "expected a VGPR index mode or a closing parenthesis" :
8917 "expected a VGPR index mode");
8918 return UNDEF;
8919 }
8920
8921 if (Imm & Mode) {
8922 Error(S, "duplicate VGPR index mode");
8923 return UNDEF;
8924 }
8925 Imm |= Mode;
8926
8927 if (trySkipToken(AsmToken::RParen))
8928 break;
8929 if (!skipToken(AsmToken::Comma,
8930 "expected a comma or a closing parenthesis"))
8931 return UNDEF;
8932 }
8933
8934 return Imm;
8935}
8936
8937ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
8938
8939 using namespace llvm::AMDGPU::VGPRIndexMode;
8940
8941 int64_t Imm = 0;
8942 SMLoc S = getLoc();
8943
8944 if (trySkipId("gpr_idx", AsmToken::LParen)) {
8945 Imm = parseGPRIdxMacro();
8946 if (Imm == UNDEF)
8947 return ParseStatus::Failure;
8948 } else {
8949 if (getParser().parseAbsoluteExpression(Imm))
8950 return ParseStatus::Failure;
8951 if (Imm < 0 || !isUInt<4>(Imm))
8952 return Error(S, "invalid immediate: only 4-bit values are legal");
8953 }
8954
8955 Operands.push_back(
8956 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
8957 return ParseStatus::Success;
8958}
8959
8960bool AMDGPUOperand::isGPRIdxMode() const {
8961 return isImmTy(ImmTyGprIdxMode);
8962}
8963
8964//===----------------------------------------------------------------------===//
8965// sopp branch targets
8966//===----------------------------------------------------------------------===//
8967
8968ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
8969
8970 // Make sure we are not parsing something
8971 // that looks like a label or an expression but is not.
8972 // This will improve error messages.
8973 if (isRegister() || isModifier())
8974 return ParseStatus::NoMatch;
8975
8976 if (!parseExpr(Operands))
8977 return ParseStatus::Failure;
8978
8979 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
8980 assert(Opr.isImm() || Opr.isExpr());
8981 SMLoc Loc = Opr.getStartLoc();
8982
8983 // Currently we do not support arbitrary expressions as branch targets.
8984 // Only labels and absolute expressions are accepted.
8985 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
8986 Error(Loc, "expected an absolute expression or a label");
8987 } else if (Opr.isImm() && !Opr.isS16Imm()) {
8988 Error(Loc, "expected a 16-bit signed jump offset");
8989 }
8990
8991 return ParseStatus::Success;
8992}
8993
8994//===----------------------------------------------------------------------===//
8995// Boolean holding registers
8996//===----------------------------------------------------------------------===//
8997
8998ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
8999 return parseReg(Operands);
9000}
9001
9002//===----------------------------------------------------------------------===//
9003// mubuf
9004//===----------------------------------------------------------------------===//
9005
9006void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
9007 const OperandVector &Operands,
9008 bool IsAtomic) {
9009 OptionalImmIndexMap OptionalIdx;
9010 unsigned FirstOperandIdx = 1;
9011 bool IsAtomicReturn = false;
9012
9013 if (IsAtomic) {
9014 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
9016 }
9017
9018 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
9019 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9020
9021 // Add the register arguments
9022 if (Op.isReg()) {
9023 Op.addRegOperands(Inst, 1);
9024 // Insert a tied src for atomic return dst.
9025 // This cannot be postponed as subsequent calls to
9026 // addImmOperands rely on correct number of MC operands.
9027 if (IsAtomicReturn && i == FirstOperandIdx)
9028 Op.addRegOperands(Inst, 1);
9029 continue;
9030 }
9031
9032 // Handle the case where soffset is an immediate
9033 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
9034 Op.addImmOperands(Inst, 1);
9035 continue;
9036 }
9037
9038 // Handle tokens like 'offen' which are sometimes hard-coded into the
9039 // asm string. There are no MCInst operands for these.
9040 if (Op.isToken()) {
9041 continue;
9042 }
9043 assert(Op.isImm());
9044
9045 // Handle optional arguments
9046 OptionalIdx[Op.getImmTy()] = i;
9047 }
9048
9049 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
9050 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
9051}
9052
9053//===----------------------------------------------------------------------===//
9054// smrd
9055//===----------------------------------------------------------------------===//
9056
9057bool AMDGPUOperand::isSMRDOffset8() const {
9058 return isImmLiteral() && isUInt<8>(getImm());
9059}
9060
9061bool AMDGPUOperand::isSMEMOffset() const {
9062 // Offset range is checked later by validator.
9063 return isImmLiteral();
9064}
9065
9066bool AMDGPUOperand::isSMRDLiteralOffset() const {
9067 // 32-bit literals are only supported on CI and we only want to use them
9068 // when the offset is > 8-bits.
9069 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
9070}
9071
9072//===----------------------------------------------------------------------===//
9073// vop3
9074//===----------------------------------------------------------------------===//
9075
9076static bool ConvertOmodMul(int64_t &Mul) {
9077 if (Mul != 1 && Mul != 2 && Mul != 4)
9078 return false;
9079
9080 Mul >>= 1;
9081 return true;
9082}
9083
9084static bool ConvertOmodDiv(int64_t &Div) {
9085 if (Div == 1) {
9086 Div = 0;
9087 return true;
9088 }
9089
9090 if (Div == 2) {
9091 Div = 3;
9092 return true;
9093 }
9094
9095 return false;
9096}
9097
9098// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
9099// This is intentional and ensures compatibility with sp3.
9100// See bug 35397 for details.
9101bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
9102 if (BoundCtrl == 0 || BoundCtrl == 1) {
9103 if (!isGFX11Plus())
9104 BoundCtrl = 1;
9105 return true;
9106 }
9107 return false;
9108}
9109
9110void AMDGPUAsmParser::onBeginOfFile() {
9111 if (!getParser().getStreamer().getTargetStreamer() ||
9112 getSTI().getTargetTriple().getArch() == Triple::r600)
9113 return;
9114
9115 if (!getTargetStreamer().getTargetID())
9116 getTargetStreamer().initializeTargetID(getSTI(),
9117 getSTI().getFeatureString());
9118
9119 if (isHsaAbi(getSTI()))
9120 getTargetStreamer().EmitDirectiveAMDGCNTarget();
9121}
9122
9123/// Parse AMDGPU specific expressions.
9124///
9125/// expr ::= or(expr, ...) |
9126/// max(expr, ...)
9127///
9128bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
9129 using AGVK = AMDGPUMCExpr::VariantKind;
9130
9131 if (isToken(AsmToken::Identifier)) {
9132 StringRef TokenId = getTokenStr();
9133 AGVK VK = StringSwitch<AGVK>(TokenId)
9134 .Case("max", AGVK::AGVK_Max)
9135 .Case("or", AGVK::AGVK_Or)
9136 .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs)
9137 .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
9138 .Case("alignto", AGVK::AGVK_AlignTo)
9139 .Case("occupancy", AGVK::AGVK_Occupancy)
9140 .Default(AGVK::AGVK_None);
9141
9142 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
9144 uint64_t CommaCount = 0;
9145 lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
9146 lex(); // Eat '('
9147 while (true) {
9148 if (trySkipToken(AsmToken::RParen)) {
9149 if (Exprs.empty()) {
9150 Error(getToken().getLoc(),
9151 "empty " + Twine(TokenId) + " expression");
9152 return true;
9153 }
9154 if (CommaCount + 1 != Exprs.size()) {
9155 Error(getToken().getLoc(),
9156 "mismatch of commas in " + Twine(TokenId) + " expression");
9157 return true;
9158 }
9159 Res = AMDGPUMCExpr::create(VK, Exprs, getContext());
9160 return false;
9161 }
9162 const MCExpr *Expr;
9163 if (getParser().parseExpression(Expr, EndLoc))
9164 return true;
9165 Exprs.push_back(Expr);
9166 bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
9167 if (LastTokenWasComma)
9168 CommaCount++;
9169 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
9170 Error(getToken().getLoc(),
9171 "unexpected token in " + Twine(TokenId) + " expression");
9172 return true;
9173 }
9174 }
9175 }
9176 }
9177 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
9178}
9179
9180ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
9181 StringRef Name = getTokenStr();
9182 if (Name == "mul") {
9183 return parseIntWithPrefix("mul", Operands,
9184 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
9185 }
9186
9187 if (Name == "div") {
9188 return parseIntWithPrefix("div", Operands,
9189 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
9190 }
9191
9192 return ParseStatus::NoMatch;
9193}
9194
9195// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
9196// the number of src operands present, then copies that bit into src0_modifiers.
9197static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
9198 int Opc = Inst.getOpcode();
9199 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9200 if (OpSelIdx == -1)
9201 return;
9202
9203 int SrcNum;
9204 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9205 AMDGPU::OpName::src2};
9206 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
9207 ++SrcNum)
9208 ;
9209 assert(SrcNum > 0);
9210
9211 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9212
9213 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
9214 if (DstIdx == -1)
9215 return;
9216
9217 const MCOperand &DstOp = Inst.getOperand(DstIdx);
9218 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
9219 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9220 if (DstOp.isReg() &&
9221 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
9223 ModVal |= SISrcMods::DST_OP_SEL;
9224 } else {
9225 if ((OpSel & (1 << SrcNum)) != 0)
9226 ModVal |= SISrcMods::DST_OP_SEL;
9227 }
9228 Inst.getOperand(ModIdx).setImm(ModVal);
9229}
9230
9231void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
9232 const OperandVector &Operands) {
9233 cvtVOP3P(Inst, Operands);
9234 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9235}
9236
9237void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
9238 OptionalImmIndexMap &OptionalIdx) {
9239 cvtVOP3P(Inst, Operands, OptionalIdx);
9240 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9241}
9242
9243static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
9244 return
9245 // 1. This operand is input modifiers
9246 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
9247 // 2. This is not last operand
9248 && Desc.NumOperands > (OpNum + 1)
9249 // 3. Next operand is register class
9250 && Desc.operands()[OpNum + 1].RegClass != -1
9251 // 4. Next register is not tied to any other operand
9252 && Desc.getOperandConstraint(OpNum + 1,
9254}
9255
9256void AMDGPUAsmParser::cvtOpSelHelper(MCInst &Inst, unsigned OpSel) {
9257 unsigned Opc = Inst.getOpcode();
9258 constexpr AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9259 AMDGPU::OpName::src2};
9260 constexpr AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9261 AMDGPU::OpName::src1_modifiers,
9262 AMDGPU::OpName::src2_modifiers};
9263 for (int J = 0; J < 3; ++J) {
9264 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9265 if (OpIdx == -1)
9266 // Some instructions, e.g. v_interp_p2_f16 in GFX9, have src0, src2, but
9267 // no src1. So continue instead of break.
9268 continue;
9269
9270 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9271 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9272
9273 if ((OpSel & (1 << J)) != 0)
9274 ModVal |= SISrcMods::OP_SEL_0;
9275 // op_sel[3] is encoded in src0_modifiers.
9276 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && (OpSel & (1 << 3)) != 0)
9277 ModVal |= SISrcMods::DST_OP_SEL;
9278
9279 Inst.getOperand(ModIdx).setImm(ModVal);
9280 }
9281}
9282
9283void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
9284{
9285 OptionalImmIndexMap OptionalIdx;
9286 unsigned Opc = Inst.getOpcode();
9287
9288 unsigned I = 1;
9289 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9290 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9291 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9292 }
9293
9294 for (unsigned E = Operands.size(); I != E; ++I) {
9295 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9297 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9298 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
9299 Op.isInterpAttrChan()) {
9300 Inst.addOperand(MCOperand::createImm(Op.getImm()));
9301 } else if (Op.isImmModifier()) {
9302 OptionalIdx[Op.getImmTy()] = I;
9303 } else {
9304 llvm_unreachable("unhandled operand type");
9305 }
9306 }
9307
9308 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
9309 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9310 AMDGPUOperand::ImmTyHigh);
9311
9312 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9313 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9314 AMDGPUOperand::ImmTyClamp);
9315
9316 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9317 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9318 AMDGPUOperand::ImmTyOModSI);
9319
9320 // Some v_interp instructions use op_sel[3] for dst.
9321 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
9322 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9323 AMDGPUOperand::ImmTyOpSel);
9324 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9325 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9326
9327 cvtOpSelHelper(Inst, OpSel);
9328 }
9329}
9330
9331void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
9332{
9333 OptionalImmIndexMap OptionalIdx;
9334 unsigned Opc = Inst.getOpcode();
9335
9336 unsigned I = 1;
9337 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9338 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9339 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9340 }
9341
9342 for (unsigned E = Operands.size(); I != E; ++I) {
9343 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9345 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9346 } else if (Op.isImmModifier()) {
9347 OptionalIdx[Op.getImmTy()] = I;
9348 } else {
9349 llvm_unreachable("unhandled operand type");
9350 }
9351 }
9352
9353 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp);
9354
9355 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9356 if (OpSelIdx != -1)
9357 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9358
9359 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
9360
9361 if (OpSelIdx == -1)
9362 return;
9363
9364 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9365 cvtOpSelHelper(Inst, OpSel);
9366}
9367
9368void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
9369 const OperandVector &Operands) {
9370 OptionalImmIndexMap OptionalIdx;
9371 unsigned Opc = Inst.getOpcode();
9372 unsigned I = 1;
9373 int CbszOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
9374
9375 const MCInstrDesc &Desc = MII.get(Opc);
9376
9377 for (unsigned J = 0; J < Desc.getNumDefs(); ++J)
9378 static_cast<AMDGPUOperand &>(*Operands[I++]).addRegOperands(Inst, 1);
9379
9380 for (unsigned E = Operands.size(); I != E; ++I) {
9381 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[I]);
9382 int NumOperands = Inst.getNumOperands();
9383 // The order of operands in MCInst and parsed operands are different.
9384 // Adding dummy cbsz and blgp operands at corresponding MCInst operand
9385 // indices for parsing scale values correctly.
9386 if (NumOperands == CbszOpIdx) {
9389 }
9390 if (isRegOrImmWithInputMods(Desc, NumOperands)) {
9391 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9392 } else if (Op.isImmModifier()) {
9393 OptionalIdx[Op.getImmTy()] = I;
9394 } else {
9395 Op.addRegOrImmOperands(Inst, 1);
9396 }
9397 }
9398
9399 // Insert CBSZ and BLGP operands for F8F6F4 variants
9400 auto CbszIdx = OptionalIdx.find(AMDGPUOperand::ImmTyCBSZ);
9401 if (CbszIdx != OptionalIdx.end()) {
9402 int CbszVal = ((AMDGPUOperand &)*Operands[CbszIdx->second]).getImm();
9403 Inst.getOperand(CbszOpIdx).setImm(CbszVal);
9404 }
9405
9406 int BlgpOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
9407 auto BlgpIdx = OptionalIdx.find(AMDGPUOperand::ImmTyBLGP);
9408 if (BlgpIdx != OptionalIdx.end()) {
9409 int BlgpVal = ((AMDGPUOperand &)*Operands[BlgpIdx->second]).getImm();
9410 Inst.getOperand(BlgpOpIdx).setImm(BlgpVal);
9411 }
9412
9413 // Add dummy src_modifiers
9416
9417 // Handle op_sel fields
9418
9419 unsigned OpSel = 0;
9420 auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
9421 if (OpselIdx != OptionalIdx.end()) {
9422 OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])
9423 .getImm();
9424 }
9425
9426 unsigned OpSelHi = 0;
9427 auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
9428 if (OpselHiIdx != OptionalIdx.end()) {
9429 OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])
9430 .getImm();
9431 }
9432 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9433 AMDGPU::OpName::src1_modifiers};
9434
9435 for (unsigned J = 0; J < 2; ++J) {
9436 unsigned ModVal = 0;
9437 if (OpSel & (1 << J))
9438 ModVal |= SISrcMods::OP_SEL_0;
9439 if (OpSelHi & (1 << J))
9440 ModVal |= SISrcMods::OP_SEL_1;
9441
9442 const int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9443 Inst.getOperand(ModIdx).setImm(ModVal);
9444 }
9445}
9446
9447void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
9448 OptionalImmIndexMap &OptionalIdx) {
9449 unsigned Opc = Inst.getOpcode();
9450
9451 unsigned I = 1;
9452 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9453 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9454 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9455 }
9456
9457 for (unsigned E = Operands.size(); I != E; ++I) {
9458 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9460 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9461 } else if (Op.isImmModifier()) {
9462 OptionalIdx[Op.getImmTy()] = I;
9463 } else {
9464 Op.addRegOrImmOperands(Inst, 1);
9465 }
9466 }
9467
9468 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::scale_sel))
9469 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9470 AMDGPUOperand::ImmTyScaleSel);
9471
9472 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9473 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9474 AMDGPUOperand::ImmTyClamp);
9475
9476 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
9477 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
9478 Inst.addOperand(Inst.getOperand(0));
9479 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9480 AMDGPUOperand::ImmTyByteSel);
9481 }
9482
9483 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9484 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9485 AMDGPUOperand::ImmTyOModSI);
9486
9487 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
9488 // it has src2 register operand that is tied to dst operand
9489 // we don't allow modifiers for this operand in assembler so src2_modifiers
9490 // should be 0.
9491 if (isMAC(Opc)) {
9492 auto *it = Inst.begin();
9493 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
9494 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
9495 ++it;
9496 // Copy the operand to ensure it's not invalidated when Inst grows.
9497 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
9498 }
9499}
9500
9501void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
9502 OptionalImmIndexMap OptionalIdx;
9503 cvtVOP3(Inst, Operands, OptionalIdx);
9504}
9505
9506void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
9507 OptionalImmIndexMap &OptIdx) {
9508 const int Opc = Inst.getOpcode();
9509 const MCInstrDesc &Desc = MII.get(Opc);
9510
9511 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
9512
9513 if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
9514 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
9515 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
9516 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
9517 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
9518 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
9519 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
9520 Inst.addOperand(Inst.getOperand(0));
9521 }
9522
9523 // Adding vdst_in operand is already covered for these DPP instructions in
9524 // cvtVOP3DPP.
9525 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
9526 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx12 ||
9527 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx12 ||
9528 Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx12 ||
9529 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx12 ||
9530 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp_gfx12 ||
9531 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp_gfx12 ||
9532 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp8_gfx12 ||
9533 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp8_gfx12 ||
9534 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
9535 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9536 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp_gfx1250 ||
9537 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp8_gfx1250 ||
9538 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9539 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9540 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp_gfx1250 ||
9541 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp_gfx1250 ||
9542 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp8_gfx1250 ||
9543 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp8_gfx1250 ||
9544 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_gfx1250 ||
9545 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_gfx1250 ||
9546 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp_gfx1250 ||
9547 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp_gfx1250 ||
9548 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp8_gfx1250 ||
9549 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp8_gfx1250 ||
9550 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_gfx1250 ||
9551 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_gfx1250)) {
9552 Inst.addOperand(Inst.getOperand(0));
9553 }
9554
9555 int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::bitop3);
9556 if (BitOp3Idx != -1) {
9557 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9558 }
9559
9560 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
9561 // instruction, and then figure out where to actually put the modifiers
9562
9563 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9564 if (OpSelIdx != -1) {
9565 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
9566 }
9567
9568 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
9569 if (OpSelHiIdx != -1) {
9570 int DefaultVal = IsPacked ? -1 : 0;
9571 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
9572 DefaultVal);
9573 }
9574
9575 int MatrixAFMTIdx =
9576 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_fmt);
9577 if (MatrixAFMTIdx != -1) {
9578 addOptionalImmOperand(Inst, Operands, OptIdx,
9579 AMDGPUOperand::ImmTyMatrixAFMT, 0);
9580 }
9581
9582 int MatrixBFMTIdx =
9583 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_fmt);
9584 if (MatrixBFMTIdx != -1) {
9585 addOptionalImmOperand(Inst, Operands, OptIdx,
9586 AMDGPUOperand::ImmTyMatrixBFMT, 0);
9587 }
9588
9589 int MatrixAScaleIdx =
9590 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale);
9591 if (MatrixAScaleIdx != -1) {
9592 addOptionalImmOperand(Inst, Operands, OptIdx,
9593 AMDGPUOperand::ImmTyMatrixAScale, 0);
9594 }
9595
9596 int MatrixBScaleIdx =
9597 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale);
9598 if (MatrixBScaleIdx != -1) {
9599 addOptionalImmOperand(Inst, Operands, OptIdx,
9600 AMDGPUOperand::ImmTyMatrixBScale, 0);
9601 }
9602
9603 int MatrixAScaleFmtIdx =
9604 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale_fmt);
9605 if (MatrixAScaleFmtIdx != -1) {
9606 addOptionalImmOperand(Inst, Operands, OptIdx,
9607 AMDGPUOperand::ImmTyMatrixAScaleFmt, 0);
9608 }
9609
9610 int MatrixBScaleFmtIdx =
9611 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale_fmt);
9612 if (MatrixBScaleFmtIdx != -1) {
9613 addOptionalImmOperand(Inst, Operands, OptIdx,
9614 AMDGPUOperand::ImmTyMatrixBScaleFmt, 0);
9615 }
9616
9617 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_a_reuse))
9618 addOptionalImmOperand(Inst, Operands, OptIdx,
9619 AMDGPUOperand::ImmTyMatrixAReuse, 0);
9620
9621 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_b_reuse))
9622 addOptionalImmOperand(Inst, Operands, OptIdx,
9623 AMDGPUOperand::ImmTyMatrixBReuse, 0);
9624
9625 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
9626 if (NegLoIdx != -1)
9627 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
9628
9629 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
9630 if (NegHiIdx != -1)
9631 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
9632
9633 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9634 AMDGPU::OpName::src2};
9635 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9636 AMDGPU::OpName::src1_modifiers,
9637 AMDGPU::OpName::src2_modifiers};
9638
9639 unsigned OpSel = 0;
9640 unsigned OpSelHi = 0;
9641 unsigned NegLo = 0;
9642 unsigned NegHi = 0;
9643
9644 if (OpSelIdx != -1)
9645 OpSel = Inst.getOperand(OpSelIdx).getImm();
9646
9647 if (OpSelHiIdx != -1)
9648 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
9649
9650 if (NegLoIdx != -1)
9651 NegLo = Inst.getOperand(NegLoIdx).getImm();
9652
9653 if (NegHiIdx != -1)
9654 NegHi = Inst.getOperand(NegHiIdx).getImm();
9655
9656 for (int J = 0; J < 3; ++J) {
9657 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9658 if (OpIdx == -1)
9659 break;
9660
9661 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9662
9663 if (ModIdx == -1)
9664 continue;
9665
9666 uint32_t ModVal = 0;
9667
9668 const MCOperand &SrcOp = Inst.getOperand(OpIdx);
9669 if (SrcOp.isReg() && getMRI()
9670 ->getRegClass(AMDGPU::VGPR_16RegClassID)
9671 .contains(SrcOp.getReg())) {
9672 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(SrcOp.getReg(), *getMRI());
9673 if (VGPRSuffixIsHi)
9674 ModVal |= SISrcMods::OP_SEL_0;
9675 } else {
9676 if ((OpSel & (1 << J)) != 0)
9677 ModVal |= SISrcMods::OP_SEL_0;
9678 }
9679
9680 if ((OpSelHi & (1 << J)) != 0)
9681 ModVal |= SISrcMods::OP_SEL_1;
9682
9683 if ((NegLo & (1 << J)) != 0)
9684 ModVal |= SISrcMods::NEG;
9685
9686 if ((NegHi & (1 << J)) != 0)
9687 ModVal |= SISrcMods::NEG_HI;
9688
9689 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
9690 }
9691}
9692
9693void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
9694 OptionalImmIndexMap OptIdx;
9695 cvtVOP3(Inst, Operands, OptIdx);
9696 cvtVOP3P(Inst, Operands, OptIdx);
9697}
9698
9700 unsigned i, unsigned Opc,
9701 AMDGPU::OpName OpName) {
9702 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
9703 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
9704 else
9705 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
9706}
9707
9708void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
9709 unsigned Opc = Inst.getOpcode();
9710
9711 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
9712 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
9713 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
9714 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
9715 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
9716
9717 OptionalImmIndexMap OptIdx;
9718 for (unsigned i = 5; i < Operands.size(); ++i) {
9719 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9720 OptIdx[Op.getImmTy()] = i;
9721 }
9722
9723 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
9724 addOptionalImmOperand(Inst, Operands, OptIdx,
9725 AMDGPUOperand::ImmTyIndexKey8bit);
9726
9727 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
9728 addOptionalImmOperand(Inst, Operands, OptIdx,
9729 AMDGPUOperand::ImmTyIndexKey16bit);
9730
9731 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_32bit))
9732 addOptionalImmOperand(Inst, Operands, OptIdx,
9733 AMDGPUOperand::ImmTyIndexKey32bit);
9734
9735 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9736 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp);
9737
9738 cvtVOP3P(Inst, Operands, OptIdx);
9739}
9740
9741//===----------------------------------------------------------------------===//
9742// VOPD
9743//===----------------------------------------------------------------------===//
9744
9745ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
9746 if (!hasVOPD(getSTI()))
9747 return ParseStatus::NoMatch;
9748
9749 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
9750 SMLoc S = getLoc();
9751 lex();
9752 lex();
9753 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
9754 SMLoc OpYLoc = getLoc();
9755 StringRef OpYName;
9756 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
9757 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
9758 return ParseStatus::Success;
9759 }
9760 return Error(OpYLoc, "expected a VOPDY instruction after ::");
9761 }
9762 return ParseStatus::NoMatch;
9763}
9764
9765// Create VOPD MCInst operands using parsed assembler operands.
9766void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
9767 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9768
9769 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
9770 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
9772 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9773 return;
9774 }
9775 if (Op.isReg()) {
9776 Op.addRegOperands(Inst, 1);
9777 return;
9778 }
9779 if (Op.isImm()) {
9780 Op.addImmOperands(Inst, 1);
9781 return;
9782 }
9783 llvm_unreachable("Unhandled operand type in cvtVOPD");
9784 };
9785
9786 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
9787
9788 // MCInst operands are ordered as follows:
9789 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
9790
9791 for (auto CompIdx : VOPD::COMPONENTS) {
9792 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
9793 }
9794
9795 for (auto CompIdx : VOPD::COMPONENTS) {
9796 const auto &CInfo = InstInfo[CompIdx];
9797 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
9798 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
9799 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
9800 if (CInfo.hasSrc2Acc())
9801 addOp(CInfo.getIndexOfDstInParsedOperands());
9802 }
9803
9804 int BitOp3Idx =
9805 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::bitop3);
9806 if (BitOp3Idx != -1) {
9807 OptionalImmIndexMap OptIdx;
9808 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back());
9809 if (Op.isImm())
9810 OptIdx[Op.getImmTy()] = Operands.size() - 1;
9811
9812 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9813 }
9814}
9815
9816//===----------------------------------------------------------------------===//
9817// dpp
9818//===----------------------------------------------------------------------===//
9819
9820bool AMDGPUOperand::isDPP8() const {
9821 return isImmTy(ImmTyDPP8);
9822}
9823
9824bool AMDGPUOperand::isDPPCtrl() const {
9825 using namespace AMDGPU::DPP;
9826
9827 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
9828 if (result) {
9829 int64_t Imm = getImm();
9830 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
9831 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
9832 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
9833 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
9834 (Imm == DppCtrl::WAVE_SHL1) ||
9835 (Imm == DppCtrl::WAVE_ROL1) ||
9836 (Imm == DppCtrl::WAVE_SHR1) ||
9837 (Imm == DppCtrl::WAVE_ROR1) ||
9838 (Imm == DppCtrl::ROW_MIRROR) ||
9839 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
9840 (Imm == DppCtrl::BCAST15) ||
9841 (Imm == DppCtrl::BCAST31) ||
9842 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
9843 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
9844 }
9845 return false;
9846}
9847
9848//===----------------------------------------------------------------------===//
9849// mAI
9850//===----------------------------------------------------------------------===//
9851
9852bool AMDGPUOperand::isBLGP() const {
9853 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
9854}
9855
9856bool AMDGPUOperand::isS16Imm() const {
9857 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
9858}
9859
9860bool AMDGPUOperand::isU16Imm() const {
9861 return isImmLiteral() && isUInt<16>(getImm());
9862}
9863
9864//===----------------------------------------------------------------------===//
9865// dim
9866//===----------------------------------------------------------------------===//
9867
9868bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
9869 // We want to allow "dim:1D" etc.,
9870 // but the initial 1 is tokenized as an integer.
9871 std::string Token;
9872 if (isToken(AsmToken::Integer)) {
9873 SMLoc Loc = getToken().getEndLoc();
9874 Token = std::string(getTokenStr());
9875 lex();
9876 if (getLoc() != Loc)
9877 return false;
9878 }
9879
9880 StringRef Suffix;
9881 if (!parseId(Suffix))
9882 return false;
9883 Token += Suffix;
9884
9885 StringRef DimId = Token;
9886 DimId.consume_front("SQ_RSRC_IMG_");
9887
9888 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
9889 if (!DimInfo)
9890 return false;
9891
9892 Encoding = DimInfo->Encoding;
9893 return true;
9894}
9895
9896ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
9897 if (!isGFX10Plus())
9898 return ParseStatus::NoMatch;
9899
9900 SMLoc S = getLoc();
9901
9902 if (!trySkipId("dim", AsmToken::Colon))
9903 return ParseStatus::NoMatch;
9904
9905 unsigned Encoding;
9906 SMLoc Loc = getLoc();
9907 if (!parseDimId(Encoding))
9908 return Error(Loc, "invalid dim value");
9909
9910 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
9911 AMDGPUOperand::ImmTyDim));
9912 return ParseStatus::Success;
9913}
9914
9915//===----------------------------------------------------------------------===//
9916// dpp
9917//===----------------------------------------------------------------------===//
9918
9919ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
9920 SMLoc S = getLoc();
9921
9922 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
9923 return ParseStatus::NoMatch;
9924
9925 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
9926
9927 int64_t Sels[8];
9928
9929 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9930 return ParseStatus::Failure;
9931
9932 for (size_t i = 0; i < 8; ++i) {
9933 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9934 return ParseStatus::Failure;
9935
9936 SMLoc Loc = getLoc();
9937 if (getParser().parseAbsoluteExpression(Sels[i]))
9938 return ParseStatus::Failure;
9939 if (0 > Sels[i] || 7 < Sels[i])
9940 return Error(Loc, "expected a 3-bit value");
9941 }
9942
9943 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9944 return ParseStatus::Failure;
9945
9946 unsigned DPP8 = 0;
9947 for (size_t i = 0; i < 8; ++i)
9948 DPP8 |= (Sels[i] << (i * 3));
9949
9950 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9951 return ParseStatus::Success;
9952}
9953
9954bool
9955AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
9956 const OperandVector &Operands) {
9957 if (Ctrl == "row_newbcast")
9958 return isGFX90A();
9959
9960 if (Ctrl == "row_share" ||
9961 Ctrl == "row_xmask")
9962 return isGFX10Plus();
9963
9964 if (Ctrl == "wave_shl" ||
9965 Ctrl == "wave_shr" ||
9966 Ctrl == "wave_rol" ||
9967 Ctrl == "wave_ror" ||
9968 Ctrl == "row_bcast")
9969 return isVI() || isGFX9();
9970
9971 return Ctrl == "row_mirror" ||
9972 Ctrl == "row_half_mirror" ||
9973 Ctrl == "quad_perm" ||
9974 Ctrl == "row_shl" ||
9975 Ctrl == "row_shr" ||
9976 Ctrl == "row_ror";
9977}
9978
9979int64_t
9980AMDGPUAsmParser::parseDPPCtrlPerm() {
9981 // quad_perm:[%d,%d,%d,%d]
9982
9983 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9984 return -1;
9985
9986 int64_t Val = 0;
9987 for (int i = 0; i < 4; ++i) {
9988 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9989 return -1;
9990
9991 int64_t Temp;
9992 SMLoc Loc = getLoc();
9993 if (getParser().parseAbsoluteExpression(Temp))
9994 return -1;
9995 if (Temp < 0 || Temp > 3) {
9996 Error(Loc, "expected a 2-bit value");
9997 return -1;
9998 }
9999
10000 Val += (Temp << i * 2);
10001 }
10002
10003 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
10004 return -1;
10005
10006 return Val;
10007}
10008
10009int64_t
10010AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
10011 using namespace AMDGPU::DPP;
10012
10013 // sel:%d
10014
10015 int64_t Val;
10016 SMLoc Loc = getLoc();
10017
10018 if (getParser().parseAbsoluteExpression(Val))
10019 return -1;
10020
10021 struct DppCtrlCheck {
10022 int64_t Ctrl;
10023 int Lo;
10024 int Hi;
10025 };
10026
10027 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
10028 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
10029 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
10030 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
10031 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
10032 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
10033 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
10034 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
10035 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
10036 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
10037 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
10038 .Default({-1, 0, 0});
10039
10040 bool Valid;
10041 if (Check.Ctrl == -1) {
10042 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
10043 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
10044 } else {
10045 Valid = Check.Lo <= Val && Val <= Check.Hi;
10046 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
10047 }
10048
10049 if (!Valid) {
10050 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
10051 return -1;
10052 }
10053
10054 return Val;
10055}
10056
10057ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
10058 using namespace AMDGPU::DPP;
10059
10060 if (!isToken(AsmToken::Identifier) ||
10061 !isSupportedDPPCtrl(getTokenStr(), Operands))
10062 return ParseStatus::NoMatch;
10063
10064 SMLoc S = getLoc();
10065 int64_t Val = -1;
10066 StringRef Ctrl;
10067
10068 parseId(Ctrl);
10069
10070 if (Ctrl == "row_mirror") {
10071 Val = DppCtrl::ROW_MIRROR;
10072 } else if (Ctrl == "row_half_mirror") {
10073 Val = DppCtrl::ROW_HALF_MIRROR;
10074 } else {
10075 if (skipToken(AsmToken::Colon, "expected a colon")) {
10076 if (Ctrl == "quad_perm") {
10077 Val = parseDPPCtrlPerm();
10078 } else {
10079 Val = parseDPPCtrlSel(Ctrl);
10080 }
10081 }
10082 }
10083
10084 if (Val == -1)
10085 return ParseStatus::Failure;
10086
10087 Operands.push_back(
10088 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
10089 return ParseStatus::Success;
10090}
10091
10092void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
10093 bool IsDPP8) {
10094 OptionalImmIndexMap OptionalIdx;
10095 unsigned Opc = Inst.getOpcode();
10096 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10097
10098 // MAC instructions are special because they have 'old'
10099 // operand which is not tied to dst (but assumed to be).
10100 // They also have dummy unused src2_modifiers.
10101 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
10102 int Src2ModIdx =
10103 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
10104 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
10105 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
10106
10107 unsigned I = 1;
10108 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10109 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10110 }
10111
10112 int Fi = 0;
10113 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
10114 bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
10115 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
10116 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
10117 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
10118
10119 for (unsigned E = Operands.size(); I != E; ++I) {
10120
10121 if (IsMAC) {
10122 int NumOperands = Inst.getNumOperands();
10123 if (OldIdx == NumOperands) {
10124 // Handle old operand
10125 constexpr int DST_IDX = 0;
10126 Inst.addOperand(Inst.getOperand(DST_IDX));
10127 } else if (Src2ModIdx == NumOperands) {
10128 // Add unused dummy src2_modifiers
10130 }
10131 }
10132
10133 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
10134 Inst.addOperand(Inst.getOperand(0));
10135 }
10136
10137 if (IsVOP3CvtSrDpp) {
10138 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
10140 Inst.addOperand(MCOperand::createReg(MCRegister()));
10141 }
10142 }
10143
10144 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10146 if (TiedTo != -1) {
10147 assert((unsigned)TiedTo < Inst.getNumOperands());
10148 // handle tied old or src2 for MAC instructions
10149 Inst.addOperand(Inst.getOperand(TiedTo));
10150 }
10151 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10152 // Add the register arguments
10153 if (IsDPP8 && Op.isDppFI()) {
10154 Fi = Op.getImm();
10155 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10156 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10157 } else if (Op.isReg()) {
10158 Op.addRegOperands(Inst, 1);
10159 } else if (Op.isImm() &&
10160 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
10161 Op.addImmOperands(Inst, 1);
10162 } else if (Op.isImm()) {
10163 OptionalIdx[Op.getImmTy()] = I;
10164 } else {
10165 llvm_unreachable("unhandled operand type");
10166 }
10167 }
10168
10169 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp) && !IsVOP3CvtSrDpp)
10170 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10171 AMDGPUOperand::ImmTyClamp);
10172
10173 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
10174 if (VdstInIdx == static_cast<int>(Inst.getNumOperands()))
10175 Inst.addOperand(Inst.getOperand(0));
10176 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10177 AMDGPUOperand::ImmTyByteSel);
10178 }
10179
10180 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10181 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
10182
10183 if (Desc.TSFlags & SIInstrFlags::VOP3P)
10184 cvtVOP3P(Inst, Operands, OptionalIdx);
10185 else if (Desc.TSFlags & SIInstrFlags::VOP3)
10186 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
10187 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
10188 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
10189 }
10190
10191 if (IsDPP8) {
10192 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
10193 using namespace llvm::AMDGPU::DPP;
10194 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10195 } else {
10196 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
10197 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10198 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10199 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10200
10201 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
10202 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10203 AMDGPUOperand::ImmTyDppFI);
10204 }
10205}
10206
10207void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
10208 OptionalImmIndexMap OptionalIdx;
10209
10210 unsigned I = 1;
10211 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10212 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10213 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10214 }
10215
10216 int Fi = 0;
10217 for (unsigned E = Operands.size(); I != E; ++I) {
10218 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10220 if (TiedTo != -1) {
10221 assert((unsigned)TiedTo < Inst.getNumOperands());
10222 // handle tied old or src2 for MAC instructions
10223 Inst.addOperand(Inst.getOperand(TiedTo));
10224 }
10225 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10226 // Add the register arguments
10227 if (Op.isReg() && validateVccOperand(Op.getReg())) {
10228 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
10229 // Skip it.
10230 continue;
10231 }
10232
10233 if (IsDPP8) {
10234 if (Op.isDPP8()) {
10235 Op.addImmOperands(Inst, 1);
10236 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10237 Op.addRegWithFPInputModsOperands(Inst, 2);
10238 } else if (Op.isDppFI()) {
10239 Fi = Op.getImm();
10240 } else if (Op.isReg()) {
10241 Op.addRegOperands(Inst, 1);
10242 } else {
10243 llvm_unreachable("Invalid operand type");
10244 }
10245 } else {
10247 Op.addRegWithFPInputModsOperands(Inst, 2);
10248 } else if (Op.isReg()) {
10249 Op.addRegOperands(Inst, 1);
10250 } else if (Op.isDPPCtrl()) {
10251 Op.addImmOperands(Inst, 1);
10252 } else if (Op.isImm()) {
10253 // Handle optional arguments
10254 OptionalIdx[Op.getImmTy()] = I;
10255 } else {
10256 llvm_unreachable("Invalid operand type");
10257 }
10258 }
10259 }
10260
10261 if (IsDPP8) {
10262 using namespace llvm::AMDGPU::DPP;
10263 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10264 } else {
10265 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10266 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10267 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10268 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
10269 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10270 AMDGPUOperand::ImmTyDppFI);
10271 }
10272 }
10273}
10274
10275//===----------------------------------------------------------------------===//
10276// sdwa
10277//===----------------------------------------------------------------------===//
10278
10279ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
10280 StringRef Prefix,
10281 AMDGPUOperand::ImmTy Type) {
10282 return parseStringOrIntWithPrefix(
10283 Operands, Prefix,
10284 {"BYTE_0", "BYTE_1", "BYTE_2", "BYTE_3", "WORD_0", "WORD_1", "DWORD"},
10285 Type);
10286}
10287
10288ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
10289 return parseStringOrIntWithPrefix(
10290 Operands, "dst_unused", {"UNUSED_PAD", "UNUSED_SEXT", "UNUSED_PRESERVE"},
10291 AMDGPUOperand::ImmTySDWADstUnused);
10292}
10293
10294void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
10295 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
10296}
10297
10298void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
10299 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
10300}
10301
10302void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
10303 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
10304}
10305
10306void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
10307 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
10308}
10309
10310void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
10311 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
10312}
10313
10314void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
10315 uint64_t BasicInstType,
10316 bool SkipDstVcc,
10317 bool SkipSrcVcc) {
10318 using namespace llvm::AMDGPU::SDWA;
10319
10320 OptionalImmIndexMap OptionalIdx;
10321 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
10322 bool SkippedVcc = false;
10323
10324 unsigned I = 1;
10325 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10326 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10327 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10328 }
10329
10330 for (unsigned E = Operands.size(); I != E; ++I) {
10331 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10332 if (SkipVcc && !SkippedVcc && Op.isReg() &&
10333 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
10334 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
10335 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
10336 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
10337 // Skip VCC only if we didn't skip it on previous iteration.
10338 // Note that src0 and src1 occupy 2 slots each because of modifiers.
10339 if (BasicInstType == SIInstrFlags::VOP2 &&
10340 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
10341 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
10342 SkippedVcc = true;
10343 continue;
10344 }
10345 if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) {
10346 SkippedVcc = true;
10347 continue;
10348 }
10349 }
10351 Op.addRegOrImmWithInputModsOperands(Inst, 2);
10352 } else if (Op.isImm()) {
10353 // Handle optional arguments
10354 OptionalIdx[Op.getImmTy()] = I;
10355 } else {
10356 llvm_unreachable("Invalid operand type");
10357 }
10358 SkippedVcc = false;
10359 }
10360
10361 const unsigned Opc = Inst.getOpcode();
10362 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
10363 Opc != AMDGPU::V_NOP_sdwa_vi) {
10364 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
10365 switch (BasicInstType) {
10366 case SIInstrFlags::VOP1:
10367 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
10368 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10369 AMDGPUOperand::ImmTyClamp, 0);
10370
10371 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10372 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10373 AMDGPUOperand::ImmTyOModSI, 0);
10374
10375 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
10376 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10377 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10378
10379 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
10380 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10381 AMDGPUOperand::ImmTySDWADstUnused,
10382 DstUnused::UNUSED_PRESERVE);
10383
10384 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10385 break;
10386
10387 case SIInstrFlags::VOP2:
10388 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10389 AMDGPUOperand::ImmTyClamp, 0);
10390
10391 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
10392 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
10393
10394 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10395 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
10396 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10397 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10398 break;
10399
10400 case SIInstrFlags::VOPC:
10401 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
10402 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10403 AMDGPUOperand::ImmTyClamp, 0);
10404 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10405 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10406 break;
10407
10408 default:
10409 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
10410 }
10411 }
10412
10413 // special case v_mac_{f16, f32}:
10414 // it has src2 register operand that is tied to dst operand
10415 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
10416 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
10417 auto *it = Inst.begin();
10418 std::advance(
10419 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
10420 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
10421 }
10422}
10423
10424/// Force static initialization.
10425extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
10430
10431#define GET_MATCHER_IMPLEMENTATION
10432#define GET_MNEMONIC_SPELL_CHECKER
10433#define GET_MNEMONIC_CHECKER
10434#include "AMDGPUGenAsmMatcher.inc"
10435
10436ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
10437 unsigned MCK) {
10438 switch (MCK) {
10439 case MCK_addr64:
10440 return parseTokenOp("addr64", Operands);
10441 case MCK_done:
10442 return parseTokenOp("done", Operands);
10443 case MCK_idxen:
10444 return parseTokenOp("idxen", Operands);
10445 case MCK_lds:
10446 return parseTokenOp("lds", Operands);
10447 case MCK_offen:
10448 return parseTokenOp("offen", Operands);
10449 case MCK_off:
10450 return parseTokenOp("off", Operands);
10451 case MCK_row_95_en:
10452 return parseTokenOp("row_en", Operands);
10453 case MCK_gds:
10454 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
10455 case MCK_tfe:
10456 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
10457 }
10458 return tryCustomParseOperand(Operands, MCK);
10459}
10460
10461// This function should be defined after auto-generated include so that we have
10462// MatchClassKind enum defined
10463unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
10464 unsigned Kind) {
10465 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
10466 // But MatchInstructionImpl() expects to meet token and fails to validate
10467 // operand. This method checks if we are given immediate operand but expect to
10468 // get corresponding token.
10469 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
10470 switch (Kind) {
10471 case MCK_addr64:
10472 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
10473 case MCK_gds:
10474 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
10475 case MCK_lds:
10476 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
10477 case MCK_idxen:
10478 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
10479 case MCK_offen:
10480 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
10481 case MCK_tfe:
10482 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
10483 case MCK_SSrc_b32:
10484 // When operands have expression values, they will return true for isToken,
10485 // because it is not possible to distinguish between a token and an
10486 // expression at parse time. MatchInstructionImpl() will always try to
10487 // match an operand as a token, when isToken returns true, and when the
10488 // name of the expression is not a valid token, the match will fail,
10489 // so we need to handle it here.
10490 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
10491 case MCK_SSrc_f32:
10492 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
10493 case MCK_SOPPBrTarget:
10494 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
10495 case MCK_VReg32OrOff:
10496 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
10497 case MCK_InterpSlot:
10498 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
10499 case MCK_InterpAttr:
10500 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
10501 case MCK_InterpAttrChan:
10502 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
10503 case MCK_SReg_64:
10504 case MCK_SReg_64_XEXEC:
10505 // Null is defined as a 32-bit register but
10506 // it should also be enabled with 64-bit operands or larger.
10507 // The following code enables it for SReg_64 and larger operands
10508 // used as source and destination. Remaining source
10509 // operands are handled in isInlinableImm.
10510 case MCK_SReg_96:
10511 case MCK_SReg_128:
10512 case MCK_SReg_256:
10513 case MCK_SReg_512:
10514 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
10515 default:
10516 return Match_InvalidOperand;
10517 }
10518}
10519
10520//===----------------------------------------------------------------------===//
10521// endpgm
10522//===----------------------------------------------------------------------===//
10523
10524ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
10525 SMLoc S = getLoc();
10526 int64_t Imm = 0;
10527
10528 if (!parseExpr(Imm)) {
10529 // The operand is optional, if not present default to 0
10530 Imm = 0;
10531 }
10532
10533 if (!isUInt<16>(Imm))
10534 return Error(S, "expected a 16-bit value");
10535
10536 Operands.push_back(
10537 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
10538 return ParseStatus::Success;
10539}
10540
10541bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
10542
10543//===----------------------------------------------------------------------===//
10544// Split Barrier
10545//===----------------------------------------------------------------------===//
10546
10547bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
#define Success
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
SmallVector< int16_t, MAX_SRC_OPERANDS_NUM > OperandIndices
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, AMDGPU::OpName OpName)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static bool IsRevOpcode(const unsigned Opcode)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static MCRegister getSpecialRegForName(StringRef RegName)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0, std::optional< unsigned > InsertAt=std::nullopt)
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDGPU metadata definitions and in-memory representations.
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
#define LLVM_READNONE
Definition Compiler.h:315
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
@ Default
#define Check(C,...)
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
Loop::LoopBounds::Direction Direction
Definition LoopInfo.cpp:231
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
mir Rename Register Operands
Register Reg
Register const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(PassOpts->AAPipeline)
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Interface definition for SIInstrInfo.
unsigned unsigned DefaultVal
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file implements the SmallBitVector class.
static bool Enabled
Definition Statistic.cpp:46
StringSet - A set-like wrapper for the StringMap.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
BinaryOperator * Mul
static const char * getRegisterName(MCRegister Reg)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:6057
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
iterator end() const
Definition ArrayRef.h:136
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition MCAsmMacro.h:103
bool is(TokenKind K) const
Definition MCAsmMacro.h:75
Register getReg() const
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
void printExpr(raw_ostream &, const MCExpr &) const
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:343
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:353
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:428
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getNumOperands() const
Definition MCInst.h:212
SMLoc getLoc() const
Definition MCInst.h:208
void setLoc(SMLoc loc)
Definition MCInst.h:207
unsigned getOpcode() const
Definition MCInst.h:202
iterator insert(iterator I, const MCOperand &Op)
Definition MCInst.h:232
void addOperand(const MCOperand Op)
Definition MCInst.h:215
iterator begin()
Definition MCInst.h:227
size_t size() const
Definition MCInst.h:226
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Describe properties that are true of each instruction in the target description file.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
int16_t getOpRegClassID(const MCOperandInfo &OpInfo, unsigned HwModeId) const
Return the ID of the register class to use for OpInfo, for the active HwMode HwModeId.
Definition MCInstrInfo.h:80
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
void setImm(int64_t Val)
Definition MCInst.h:89
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
int64_t getImm() const
Definition MCInst.h:84
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
bool isImm() const
Definition MCInst.h:66
void setReg(MCRegister Reg)
Set the register number.
Definition MCInst.h:79
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
const MCExpr * getExpr() const
Definition MCInst.h:118
bool isExpr() const
Definition MCInst.h:69
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
constexpr bool isValid() const
Definition MCRegister.h:76
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
Generic base class for all target subtargets.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50
void setRedefinable(bool Value)
Mark this symbol as redefinable.
Definition MCSymbol.h:210
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
MCTargetAsmParser - Generic interface to target specific assembly parsers.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
constexpr unsigned id() const
Definition Register.h:95
Represents a location in source code.
Definition SMLoc.h:23
static SMLoc getFromPointer(const char *Ptr)
Definition SMLoc.h:36
constexpr const char * getPointer() const
Definition SMLoc.h:34
constexpr bool isValid() const
Definition SMLoc.h:29
SMLoc Start
Definition SMLoc.h:50
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:854
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
Definition StringRef.h:657
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:573
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:611
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:140
bool consume_front(StringRef Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition StringRef.h:637
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition StringRef.h:273
bool contains(StringRef key) const
Check if the set contains the given key.
Definition StringSet.h:60
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition StringSet.h:39
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY bool isLitExpr(const MCExpr *Expr)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool hasMAIInsts(const MCSubtargetInfo &STI)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
LLVM_READONLY int64_t getLitValue(const MCExpr *Expr)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:231
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:202
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:209
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:222
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:219
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:224
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:210
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:206
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:201
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:208
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:207
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:218
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:216
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:211
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:205
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:225
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:236
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:237
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:212
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:204
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:221
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:217
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:223
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:213
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:238
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:220
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:203
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition SIDefines.h:228
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1425
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ OPERAND_IMMEDIATE
Definition MCInstrDesc.h:62
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Context & getContext() const
Definition BasicBlock.h:99
bool isNull(StringRef S)
Definition YAMLTraits.h:570
This is an optimization pass for GlobalISel generic memory operations.
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
Definition Error.h:1113
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
StringMapEntry< Value * > ValueName
Definition Value.h:56
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1727
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
Definition Alignment.h:206
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
static bool isMem(const MachineInstr &MI, unsigned Op)
LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
void PrintError(const Twine &Msg)
Definition Error.cpp:104
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:252
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
Definition InstrProf.h:267
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:331
Op::Description Desc
Target & getTheR600Target()
The target for R600 GPUs.
SmallVectorImpl< std::unique_ptr< MCParsedAsmOperand > > OperandVector
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:159
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:164
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:405
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Sub
Subtraction of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1760
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:257
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
#define N
RegisterKind Kind
StringLiteral Name
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, bool InitMCExpr=true)
Instruction set architecture version.
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
static LLVM_ABI const fltSemantics & IEEEsingle() LLVM_READNONE
Definition APFloat.cpp:266
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304
static LLVM_ABI const fltSemantics & IEEEdouble() LLVM_READNONE
Definition APFloat.cpp:267
static LLVM_ABI const fltSemantics & IEEEhalf() LLVM_READNONE
Definition APFloat.cpp:264
static LLVM_ABI const fltSemantics & BFloat() LLVM_READNONE
Definition APFloat.cpp:265
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:320
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...