56enum RegisterKind { IS_UNKNOWN,
IS_VGPR, IS_SGPR,
IS_AGPR, IS_TTMP, IS_SPECIAL };
70 SMLoc StartLoc, EndLoc;
71 const AMDGPUAsmParser *AsmParser;
74 AMDGPUOperand(KindTy Kind_,
const AMDGPUAsmParser *AsmParser_)
75 : Kind(Kind_), AsmParser(AsmParser_) {}
77 using Ptr = std::unique_ptr<AMDGPUOperand>;
85 bool hasFPModifiers()
const {
return Abs || Neg; }
86 bool hasIntModifiers()
const {
return Sext; }
87 bool hasModifiers()
const {
return hasFPModifiers() || hasIntModifiers(); }
89 int64_t getFPModifiersOperand()
const {
96 int64_t getIntModifiersOperand()
const {
102 int64_t getModifiersOperand()
const {
103 assert(!(hasFPModifiers() && hasIntModifiers())
104 &&
"fp and int modifiers should not be used simultaneously");
105 if (hasFPModifiers())
106 return getFPModifiersOperand();
107 if (hasIntModifiers())
108 return getIntModifiersOperand();
112 friend raw_ostream &
operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
182 ImmTyMatrixAScaleFmt,
183 ImmTyMatrixBScaleFmt,
216 mutable int MCOpIdx = -1;
219 bool isToken()
const override {
return Kind == Token; }
221 bool isSymbolRefExpr()
const {
225 bool isImm()
const override {
226 return Kind == Immediate;
229 bool isInlinableImm(MVT type)
const;
230 bool isLiteralImm(MVT type)
const;
232 bool isRegKind()
const {
233 return Kind == Register;
236 bool isReg()
const override {
237 return isRegKind() && !hasModifiers();
240 bool isRegOrInline(
unsigned RCID, MVT type)
const {
241 return isRegClass(RCID) || isInlinableImm(type);
245 return isRegOrInline(RCID, type) || isLiteralImm(type);
248 bool isRegOrImmWithInt16InputMods()
const {
252 template <
bool IsFake16>
bool isRegOrImmWithIntT16InputMods()
const {
254 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
257 bool isRegOrImmWithInt32InputMods()
const {
261 bool isRegOrInlineImmWithInt16InputMods()
const {
262 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
265 template <
bool IsFake16>
bool isRegOrInlineImmWithIntT16InputMods()
const {
266 return isRegOrInline(
267 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
270 bool isRegOrInlineImmWithInt32InputMods()
const {
271 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
274 bool isRegOrImmWithInt64InputMods()
const {
278 bool isRegOrImmWithFP16InputMods()
const {
282 template <
bool IsFake16>
bool isRegOrImmWithFPT16InputMods()
const {
284 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
287 bool isRegOrImmWithFP32InputMods()
const {
291 bool isRegOrImmWithFP64InputMods()
const {
295 template <
bool IsFake16>
bool isRegOrInlineImmWithFP16InputMods()
const {
296 return isRegOrInline(
297 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
300 bool isRegOrInlineImmWithFP32InputMods()
const {
301 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
304 bool isRegOrInlineImmWithFP64InputMods()
const {
305 return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
308 bool isVRegWithInputMods(
unsigned RCID)
const {
return isRegClass(RCID); }
310 bool isVRegWithFP32InputMods()
const {
311 return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
314 bool isVRegWithFP64InputMods()
const {
315 return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
318 bool isPackedFP16InputMods()
const {
322 bool isPackedVGPRFP32InputMods()
const {
326 bool isVReg()
const {
327 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
328 isRegClass(AMDGPU::VReg_64RegClassID) ||
329 isRegClass(AMDGPU::VReg_96RegClassID) ||
330 isRegClass(AMDGPU::VReg_128RegClassID) ||
331 isRegClass(AMDGPU::VReg_160RegClassID) ||
332 isRegClass(AMDGPU::VReg_192RegClassID) ||
333 isRegClass(AMDGPU::VReg_256RegClassID) ||
334 isRegClass(AMDGPU::VReg_512RegClassID) ||
335 isRegClass(AMDGPU::VReg_1024RegClassID);
338 bool isVReg32()
const {
339 return isRegClass(AMDGPU::VGPR_32RegClassID);
342 bool isVReg32OrOff()
const {
343 return isOff() || isVReg32();
347 return isRegKind() &&
getReg() == AMDGPU::SGPR_NULL;
350 bool isVRegWithInputMods()
const;
351 template <
bool IsFake16>
bool isT16_Lo128VRegWithInputMods()
const;
352 template <
bool IsFake16>
bool isT16VRegWithInputMods()
const;
354 bool isSDWAOperand(MVT type)
const;
355 bool isSDWAFP16Operand()
const;
356 bool isSDWAFP32Operand()
const;
357 bool isSDWAInt16Operand()
const;
358 bool isSDWAInt32Operand()
const;
360 bool isImmTy(ImmTy ImmT)
const {
361 return isImm() &&
Imm.Type == ImmT;
364 template <ImmTy Ty>
bool isImmTy()
const {
return isImmTy(Ty); }
366 bool isImmLiteral()
const {
return isImmTy(ImmTyNone); }
368 bool isImmModifier()
const {
369 return isImm() &&
Imm.Type != ImmTyNone;
372 bool isOModSI()
const {
return isImmTy(ImmTyOModSI); }
373 bool isDim()
const {
return isImmTy(ImmTyDim); }
374 bool isR128A16()
const {
return isImmTy(ImmTyR128A16); }
375 bool isOff()
const {
return isImmTy(ImmTyOff); }
376 bool isExpTgt()
const {
return isImmTy(ImmTyExpTgt); }
377 bool isOffen()
const {
return isImmTy(ImmTyOffen); }
378 bool isIdxen()
const {
return isImmTy(ImmTyIdxen); }
379 bool isAddr64()
const {
return isImmTy(ImmTyAddr64); }
380 bool isSMEMOffsetMod()
const {
return isImmTy(ImmTySMEMOffsetMod); }
381 bool isFlatOffset()
const {
return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
382 bool isGDS()
const {
return isImmTy(ImmTyGDS); }
383 bool isLDS()
const {
return isImmTy(ImmTyLDS); }
384 bool isCPol()
const {
return isImmTy(ImmTyCPol); }
385 bool isIndexKey8bit()
const {
return isImmTy(ImmTyIndexKey8bit); }
386 bool isIndexKey16bit()
const {
return isImmTy(ImmTyIndexKey16bit); }
387 bool isIndexKey32bit()
const {
return isImmTy(ImmTyIndexKey32bit); }
388 bool isMatrixAFMT()
const {
return isImmTy(ImmTyMatrixAFMT); }
389 bool isMatrixBFMT()
const {
return isImmTy(ImmTyMatrixBFMT); }
390 bool isMatrixAScale()
const {
return isImmTy(ImmTyMatrixAScale); }
391 bool isMatrixBScale()
const {
return isImmTy(ImmTyMatrixBScale); }
392 bool isMatrixAScaleFmt()
const {
return isImmTy(ImmTyMatrixAScaleFmt); }
393 bool isMatrixBScaleFmt()
const {
return isImmTy(ImmTyMatrixBScaleFmt); }
394 bool isMatrixAReuse()
const {
return isImmTy(ImmTyMatrixAReuse); }
395 bool isMatrixBReuse()
const {
return isImmTy(ImmTyMatrixBReuse); }
396 bool isTFE()
const {
return isImmTy(ImmTyTFE); }
397 bool isFORMAT()
const {
return isImmTy(ImmTyFORMAT) &&
isUInt<7>(
getImm()); }
398 bool isDppFI()
const {
return isImmTy(ImmTyDppFI); }
399 bool isSDWADstSel()
const {
return isImmTy(ImmTySDWADstSel); }
400 bool isSDWASrc0Sel()
const {
return isImmTy(ImmTySDWASrc0Sel); }
401 bool isSDWASrc1Sel()
const {
return isImmTy(ImmTySDWASrc1Sel); }
402 bool isSDWADstUnused()
const {
return isImmTy(ImmTySDWADstUnused); }
403 bool isInterpSlot()
const {
return isImmTy(ImmTyInterpSlot); }
404 bool isInterpAttr()
const {
return isImmTy(ImmTyInterpAttr); }
405 bool isInterpAttrChan()
const {
return isImmTy(ImmTyInterpAttrChan); }
406 bool isOpSel()
const {
return isImmTy(ImmTyOpSel); }
407 bool isOpSelHi()
const {
return isImmTy(ImmTyOpSelHi); }
408 bool isNegLo()
const {
return isImmTy(ImmTyNegLo); }
409 bool isNegHi()
const {
return isImmTy(ImmTyNegHi); }
410 bool isBitOp3()
const {
return isImmTy(ImmTyBitOp3) &&
isUInt<8>(
getImm()); }
412 bool isRegOrImm()
const {
413 return isReg() || isImm();
416 bool isRegClass(
unsigned RCID)
const;
420 bool isRegOrInlineNoMods(
unsigned RCID, MVT type)
const {
421 return isRegOrInline(RCID, type) && !hasModifiers();
424 bool isSCSrcB16()
const {
425 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
428 bool isSCSrcV2B16()
const {
432 bool isSCSrc_b32()
const {
433 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
436 bool isSCSrc_b64()
const {
437 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
440 bool isBoolReg()
const;
442 bool isSCSrcF16()
const {
443 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
446 bool isSCSrcV2F16()
const {
450 bool isSCSrcF32()
const {
451 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
454 bool isSCSrcF64()
const {
455 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
458 bool isSSrc_b32()
const {
459 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
462 bool isSSrc_b16()
const {
return isSCSrcB16() || isLiteralImm(MVT::i16); }
464 bool isSSrcV2B16()
const {
469 bool isSSrc_b64()
const {
472 return isSCSrc_b64() || isLiteralImm(MVT::i64) ||
473 (((
const MCTargetAsmParser *)AsmParser)
474 ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
478 bool isSSrc_f32()
const {
479 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
482 bool isSSrcF64()
const {
return isSCSrc_b64() || isLiteralImm(MVT::f64); }
484 bool isSSrc_bf16()
const {
return isSCSrcB16() || isLiteralImm(MVT::bf16); }
486 bool isSSrc_f16()
const {
return isSCSrcB16() || isLiteralImm(MVT::f16); }
488 bool isSSrcV2F16()
const {
493 bool isSSrcV2FP32()
const {
498 bool isSCSrcV2FP32()
const {
503 bool isSSrcV2INT32()
const {
508 bool isSCSrcV2INT32()
const {
510 return isSCSrc_b32();
513 bool isSSrcOrLds_b32()
const {
514 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
515 isLiteralImm(MVT::i32) || isExpr();
518 bool isVCSrc_b32()
const {
519 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
522 bool isVCSrc_b32_Lo256()
const {
523 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo256RegClassID, MVT::i32);
526 bool isVCSrc_b64_Lo256()
const {
527 return isRegOrInlineNoMods(AMDGPU::VS_64_Lo256RegClassID, MVT::i64);
530 bool isVCSrc_b64()
const {
531 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
534 bool isVCSrcT_b16()
const {
535 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
538 bool isVCSrcTB16_Lo128()
const {
539 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
542 bool isVCSrcFake16B16_Lo128()
const {
543 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
546 bool isVCSrc_b16()
const {
547 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
550 bool isVCSrc_v2b16()
const {
return isVCSrc_b16(); }
552 bool isVCSrc_f32()
const {
553 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
556 bool isVCSrc_f64()
const {
557 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
560 bool isVCSrcTBF16()
const {
561 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
564 bool isVCSrcT_f16()
const {
565 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
568 bool isVCSrcT_bf16()
const {
569 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
572 bool isVCSrcTBF16_Lo128()
const {
573 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
576 bool isVCSrcTF16_Lo128()
const {
577 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
580 bool isVCSrcFake16BF16_Lo128()
const {
581 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
584 bool isVCSrcFake16F16_Lo128()
const {
585 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
588 bool isVCSrc_bf16()
const {
589 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
592 bool isVCSrc_f16()
const {
593 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
596 bool isVCSrc_v2bf16()
const {
return isVCSrc_bf16(); }
598 bool isVCSrc_v2f16()
const {
return isVCSrc_f16(); }
600 bool isVSrc_b32()
const {
601 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
604 bool isVSrc_b64()
const {
return isVCSrc_f64() || isLiteralImm(MVT::i64); }
606 bool isVSrcT_b16()
const {
return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
608 bool isVSrcT_b16_Lo128()
const {
609 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
612 bool isVSrcFake16_b16_Lo128()
const {
613 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
616 bool isVSrc_b16()
const {
return isVCSrc_b16() || isLiteralImm(MVT::i16); }
618 bool isVSrc_v2b16()
const {
return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
620 bool isVCSrcV2FP32()
const {
return isVCSrc_f64(); }
622 bool isVSrc_v2f32()
const {
return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
624 bool isVCSrc_v2b32()
const {
return isVCSrc_b64(); }
626 bool isVSrc_v2b32()
const {
return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
628 bool isVSrc_f32()
const {
629 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
632 bool isVSrc_f64()
const {
return isVCSrc_f64() || isLiteralImm(MVT::f64); }
634 bool isVSrcT_bf16()
const {
return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
636 bool isVSrcT_f16()
const {
return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
638 bool isVSrcT_bf16_Lo128()
const {
639 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
642 bool isVSrcT_f16_Lo128()
const {
643 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
646 bool isVSrcFake16_bf16_Lo128()
const {
647 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
650 bool isVSrcFake16_f16_Lo128()
const {
651 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
654 bool isVSrc_bf16()
const {
return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
656 bool isVSrc_f16()
const {
return isVCSrc_f16() || isLiteralImm(MVT::f16); }
658 bool isVSrc_v2bf16()
const {
659 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
662 bool isVSrc_v2f16()
const {
return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
664 bool isVSrc_NoInline_v2f16()
const {
return isVSrc_v2f16(); }
666 bool isVISrcB32()
const {
667 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
670 bool isVISrcB16()
const {
671 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
674 bool isVISrcV2B16()
const {
678 bool isVISrcF32()
const {
679 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
682 bool isVISrcF16()
const {
683 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
686 bool isVISrcV2F16()
const {
687 return isVISrcF16() || isVISrcB32();
690 bool isVISrc_64_bf16()
const {
691 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
694 bool isVISrc_64_f16()
const {
695 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
698 bool isVISrc_64_b32()
const {
699 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
702 bool isVISrc_64B64()
const {
703 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
706 bool isVISrc_64_f64()
const {
707 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
710 bool isVISrc_64V2FP32()
const {
711 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
714 bool isVISrc_64V2INT32()
const {
715 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
718 bool isVISrc_256_b32()
const {
719 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
722 bool isVISrc_256_f32()
const {
723 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
726 bool isVISrc_256B64()
const {
727 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
730 bool isVISrc_256_f64()
const {
731 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
734 bool isVISrc_512_f64()
const {
735 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64);
738 bool isVISrc_128B16()
const {
739 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
742 bool isVISrc_128V2B16()
const {
743 return isVISrc_128B16();
746 bool isVISrc_128_b32()
const {
747 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
750 bool isVISrc_128_f32()
const {
751 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
754 bool isVISrc_256V2FP32()
const {
755 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
758 bool isVISrc_256V2INT32()
const {
759 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
762 bool isVISrc_512_b32()
const {
763 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
766 bool isVISrc_512B16()
const {
767 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
770 bool isVISrc_512V2B16()
const {
771 return isVISrc_512B16();
774 bool isVISrc_512_f32()
const {
775 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
778 bool isVISrc_512F16()
const {
779 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
782 bool isVISrc_512V2F16()
const {
783 return isVISrc_512F16() || isVISrc_512_b32();
786 bool isVISrc_1024_b32()
const {
787 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
790 bool isVISrc_1024B16()
const {
791 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
794 bool isVISrc_1024V2B16()
const {
795 return isVISrc_1024B16();
798 bool isVISrc_1024_f32()
const {
799 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
802 bool isVISrc_1024F16()
const {
803 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
806 bool isVISrc_1024V2F16()
const {
807 return isVISrc_1024F16() || isVISrc_1024_b32();
810 bool isAISrcB32()
const {
811 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
814 bool isAISrcB16()
const {
815 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
818 bool isAISrcV2B16()
const {
822 bool isAISrcF32()
const {
823 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
826 bool isAISrcF16()
const {
827 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
830 bool isAISrcV2F16()
const {
831 return isAISrcF16() || isAISrcB32();
834 bool isAISrc_64B64()
const {
835 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
838 bool isAISrc_64_f64()
const {
839 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
842 bool isAISrc_128_b32()
const {
843 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
846 bool isAISrc_128B16()
const {
847 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
850 bool isAISrc_128V2B16()
const {
851 return isAISrc_128B16();
854 bool isAISrc_128_f32()
const {
855 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
858 bool isAISrc_128F16()
const {
859 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
862 bool isAISrc_128V2F16()
const {
863 return isAISrc_128F16() || isAISrc_128_b32();
866 bool isVISrc_128_bf16()
const {
867 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
870 bool isVISrc_128_f16()
const {
871 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
874 bool isVISrc_128V2F16()
const {
875 return isVISrc_128_f16() || isVISrc_128_b32();
878 bool isAISrc_256B64()
const {
879 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
882 bool isAISrc_256_f64()
const {
883 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
886 bool isAISrc_512_b32()
const {
887 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
890 bool isAISrc_512B16()
const {
891 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
894 bool isAISrc_512V2B16()
const {
895 return isAISrc_512B16();
898 bool isAISrc_512_f32()
const {
899 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
902 bool isAISrc_512F16()
const {
903 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
906 bool isAISrc_512V2F16()
const {
907 return isAISrc_512F16() || isAISrc_512_b32();
910 bool isAISrc_1024_b32()
const {
911 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
914 bool isAISrc_1024B16()
const {
915 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
918 bool isAISrc_1024V2B16()
const {
919 return isAISrc_1024B16();
922 bool isAISrc_1024_f32()
const {
923 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
926 bool isAISrc_1024F16()
const {
927 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
930 bool isAISrc_1024V2F16()
const {
931 return isAISrc_1024F16() || isAISrc_1024_b32();
934 bool isKImmFP32()
const {
935 return isLiteralImm(MVT::f32);
938 bool isKImmFP16()
const {
939 return isLiteralImm(MVT::f16);
942 bool isKImmFP64()
const {
return isLiteralImm(MVT::f64); }
944 bool isMem()
const override {
948 bool isExpr()
const {
949 return Kind == Expression;
952 bool isSOPPBrTarget()
const {
return isExpr() || isImm(); }
954 bool isSWaitCnt()
const;
955 bool isDepCtr()
const;
956 bool isSDelayALU()
const;
957 bool isHwreg()
const;
958 bool isSendMsg()
const;
959 bool isSplitBarrier()
const;
960 bool isSwizzle()
const;
961 bool isSMRDOffset8()
const;
962 bool isSMEMOffset()
const;
963 bool isSMRDLiteralOffset()
const;
965 bool isDPPCtrl()
const;
967 bool isGPRIdxMode()
const;
968 bool isS16Imm()
const;
969 bool isU16Imm()
const;
970 bool isEndpgm()
const;
972 auto getPredicate(std::function<
bool(
const AMDGPUOperand &
Op)>
P)
const {
973 return [
this,
P]() {
return P(*
this); };
978 return StringRef(Tok.Data, Tok.Length);
986 void setImm(int64_t Val) {
991 ImmTy getImmTy()
const {
996 MCRegister
getReg()
const override {
1001 SMLoc getStartLoc()
const override {
1005 SMLoc getEndLoc()
const override {
1009 SMRange getLocRange()
const {
1010 return SMRange(StartLoc, EndLoc);
1013 int getMCOpIdx()
const {
return MCOpIdx; }
1015 Modifiers getModifiers()
const {
1016 assert(isRegKind() || isImmTy(ImmTyNone));
1017 return isRegKind() ?
Reg.Mods :
Imm.Mods;
1020 void setModifiers(Modifiers Mods) {
1021 assert(isRegKind() || isImmTy(ImmTyNone));
1028 bool hasModifiers()
const {
1029 return getModifiers().hasModifiers();
1032 bool hasFPModifiers()
const {
1033 return getModifiers().hasFPModifiers();
1036 bool hasIntModifiers()
const {
1037 return getModifiers().hasIntModifiers();
1040 uint64_t applyInputFPModifiers(uint64_t Val,
unsigned Size)
const;
1042 void addImmOperands(MCInst &Inst,
unsigned N,
bool ApplyModifiers =
true)
const;
1044 void addLiteralImmOperand(MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const;
1046 void addRegOperands(MCInst &Inst,
unsigned N)
const;
1048 void addRegOrImmOperands(MCInst &Inst,
unsigned N)
const {
1050 addRegOperands(Inst,
N);
1052 addImmOperands(Inst,
N);
1055 void addRegOrImmWithInputModsOperands(MCInst &Inst,
unsigned N)
const {
1056 Modifiers Mods = getModifiers();
1059 addRegOperands(Inst,
N);
1061 addImmOperands(Inst,
N,
false);
1065 void addRegOrImmWithFPInputModsOperands(MCInst &Inst,
unsigned N)
const {
1066 assert(!hasIntModifiers());
1067 addRegOrImmWithInputModsOperands(Inst,
N);
1070 void addRegOrImmWithIntInputModsOperands(MCInst &Inst,
unsigned N)
const {
1071 assert(!hasFPModifiers());
1072 addRegOrImmWithInputModsOperands(Inst,
N);
1075 void addRegWithInputModsOperands(MCInst &Inst,
unsigned N)
const {
1076 Modifiers Mods = getModifiers();
1079 addRegOperands(Inst,
N);
1082 void addRegWithFPInputModsOperands(MCInst &Inst,
unsigned N)
const {
1083 assert(!hasIntModifiers());
1084 addRegWithInputModsOperands(Inst,
N);
1087 void addRegWithIntInputModsOperands(MCInst &Inst,
unsigned N)
const {
1088 assert(!hasFPModifiers());
1089 addRegWithInputModsOperands(Inst,
N);
1092 static void printImmTy(raw_ostream& OS, ImmTy
Type) {
1095 case ImmTyNone: OS <<
"None";
break;
1096 case ImmTyGDS: OS <<
"GDS";
break;
1097 case ImmTyLDS: OS <<
"LDS";
break;
1098 case ImmTyOffen: OS <<
"Offen";
break;
1099 case ImmTyIdxen: OS <<
"Idxen";
break;
1100 case ImmTyAddr64: OS <<
"Addr64";
break;
1101 case ImmTyOffset: OS <<
"Offset";
break;
1102 case ImmTyInstOffset: OS <<
"InstOffset";
break;
1103 case ImmTyOffset0: OS <<
"Offset0";
break;
1104 case ImmTyOffset1: OS <<
"Offset1";
break;
1105 case ImmTySMEMOffsetMod: OS <<
"SMEMOffsetMod";
break;
1106 case ImmTyCPol: OS <<
"CPol";
break;
1107 case ImmTyIndexKey8bit: OS <<
"index_key";
break;
1108 case ImmTyIndexKey16bit: OS <<
"index_key";
break;
1109 case ImmTyIndexKey32bit: OS <<
"index_key";
break;
1110 case ImmTyTFE: OS <<
"TFE";
break;
1111 case ImmTyD16: OS <<
"D16";
break;
1112 case ImmTyFORMAT: OS <<
"FORMAT";
break;
1113 case ImmTyClamp: OS <<
"Clamp";
break;
1114 case ImmTyOModSI: OS <<
"OModSI";
break;
1115 case ImmTyDPP8: OS <<
"DPP8";
break;
1116 case ImmTyDppCtrl: OS <<
"DppCtrl";
break;
1117 case ImmTyDppRowMask: OS <<
"DppRowMask";
break;
1118 case ImmTyDppBankMask: OS <<
"DppBankMask";
break;
1119 case ImmTyDppBoundCtrl: OS <<
"DppBoundCtrl";
break;
1120 case ImmTyDppFI: OS <<
"DppFI";
break;
1121 case ImmTySDWADstSel: OS <<
"SDWADstSel";
break;
1122 case ImmTySDWASrc0Sel: OS <<
"SDWASrc0Sel";
break;
1123 case ImmTySDWASrc1Sel: OS <<
"SDWASrc1Sel";
break;
1124 case ImmTySDWADstUnused: OS <<
"SDWADstUnused";
break;
1125 case ImmTyDMask: OS <<
"DMask";
break;
1126 case ImmTyDim: OS <<
"Dim";
break;
1127 case ImmTyUNorm: OS <<
"UNorm";
break;
1128 case ImmTyDA: OS <<
"DA";
break;
1129 case ImmTyR128A16: OS <<
"R128A16";
break;
1130 case ImmTyA16: OS <<
"A16";
break;
1131 case ImmTyLWE: OS <<
"LWE";
break;
1132 case ImmTyOff: OS <<
"Off";
break;
1133 case ImmTyExpTgt: OS <<
"ExpTgt";
break;
1134 case ImmTyExpCompr: OS <<
"ExpCompr";
break;
1135 case ImmTyExpVM: OS <<
"ExpVM";
break;
1136 case ImmTyHwreg: OS <<
"Hwreg";
break;
1137 case ImmTySendMsg: OS <<
"SendMsg";
break;
1138 case ImmTyInterpSlot: OS <<
"InterpSlot";
break;
1139 case ImmTyInterpAttr: OS <<
"InterpAttr";
break;
1140 case ImmTyInterpAttrChan: OS <<
"InterpAttrChan";
break;
1141 case ImmTyOpSel: OS <<
"OpSel";
break;
1142 case ImmTyOpSelHi: OS <<
"OpSelHi";
break;
1143 case ImmTyNegLo: OS <<
"NegLo";
break;
1144 case ImmTyNegHi: OS <<
"NegHi";
break;
1145 case ImmTySwizzle: OS <<
"Swizzle";
break;
1146 case ImmTyGprIdxMode: OS <<
"GprIdxMode";
break;
1147 case ImmTyHigh: OS <<
"High";
break;
1148 case ImmTyBLGP: OS <<
"BLGP";
break;
1149 case ImmTyCBSZ: OS <<
"CBSZ";
break;
1150 case ImmTyABID: OS <<
"ABID";
break;
1151 case ImmTyEndpgm: OS <<
"Endpgm";
break;
1152 case ImmTyWaitVDST: OS <<
"WaitVDST";
break;
1153 case ImmTyWaitEXP: OS <<
"WaitEXP";
break;
1154 case ImmTyWaitVAVDst: OS <<
"WaitVAVDst";
break;
1155 case ImmTyWaitVMVSrc: OS <<
"WaitVMVSrc";
break;
1156 case ImmTyBitOp3: OS <<
"BitOp3";
break;
1157 case ImmTyMatrixAFMT: OS <<
"ImmTyMatrixAFMT";
break;
1158 case ImmTyMatrixBFMT: OS <<
"ImmTyMatrixBFMT";
break;
1159 case ImmTyMatrixAScale: OS <<
"ImmTyMatrixAScale";
break;
1160 case ImmTyMatrixBScale: OS <<
"ImmTyMatrixBScale";
break;
1161 case ImmTyMatrixAScaleFmt: OS <<
"ImmTyMatrixAScaleFmt";
break;
1162 case ImmTyMatrixBScaleFmt: OS <<
"ImmTyMatrixBScaleFmt";
break;
1163 case ImmTyMatrixAReuse: OS <<
"ImmTyMatrixAReuse";
break;
1164 case ImmTyMatrixBReuse: OS <<
"ImmTyMatrixBReuse";
break;
1165 case ImmTyScaleSel: OS <<
"ScaleSel" ;
break;
1166 case ImmTyByteSel: OS <<
"ByteSel" ;
break;
1171 void print(raw_ostream &OS,
const MCAsmInfo &MAI)
const override {
1175 <<
" mods: " <<
Reg.Mods <<
'>';
1179 if (getImmTy() != ImmTyNone) {
1180 OS <<
" type: "; printImmTy(OS, getImmTy());
1182 OS <<
" mods: " <<
Imm.Mods <<
'>';
1195 static AMDGPUOperand::Ptr CreateImm(
const AMDGPUAsmParser *AsmParser,
1196 int64_t Val, SMLoc Loc,
1197 ImmTy
Type = ImmTyNone,
1198 bool IsFPImm =
false) {
1199 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1201 Op->Imm.IsFPImm = IsFPImm;
1203 Op->Imm.Mods = Modifiers();
1209 static AMDGPUOperand::Ptr CreateToken(
const AMDGPUAsmParser *AsmParser,
1210 StringRef Str, SMLoc Loc,
1211 bool HasExplicitEncodingSize =
true) {
1212 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1213 Res->Tok.Data = Str.data();
1214 Res->Tok.Length = Str.size();
1215 Res->StartLoc = Loc;
1220 static AMDGPUOperand::Ptr CreateReg(
const AMDGPUAsmParser *AsmParser,
1221 MCRegister
Reg, SMLoc S, SMLoc
E) {
1222 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1223 Op->Reg.RegNo =
Reg;
1224 Op->Reg.Mods = Modifiers();
1230 static AMDGPUOperand::Ptr CreateExpr(
const AMDGPUAsmParser *AsmParser,
1231 const class MCExpr *Expr, SMLoc S) {
1232 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1241 OS <<
"abs:" << Mods.Abs <<
" neg: " << Mods.Neg <<
" sext:" << Mods.Sext;
1250#define GET_REGISTER_MATCHER
1251#include "AMDGPUGenAsmMatcher.inc"
1252#undef GET_REGISTER_MATCHER
1253#undef GET_SUBTARGET_FEATURE_NAME
1258class KernelScopeInfo {
1259 int SgprIndexUnusedMin = -1;
1260 int VgprIndexUnusedMin = -1;
1261 int AgprIndexUnusedMin = -1;
1265 void usesSgprAt(
int i) {
1266 if (i >= SgprIndexUnusedMin) {
1267 SgprIndexUnusedMin = ++i;
1270 Ctx->getOrCreateSymbol(
Twine(
".kernel.sgpr_count"));
1276 void usesVgprAt(
int i) {
1277 if (i >= VgprIndexUnusedMin) {
1278 VgprIndexUnusedMin = ++i;
1281 Ctx->getOrCreateSymbol(
Twine(
".kernel.vgpr_count"));
1283 VgprIndexUnusedMin);
1289 void usesAgprAt(
int i) {
1294 if (i >= AgprIndexUnusedMin) {
1295 AgprIndexUnusedMin = ++i;
1298 Ctx->getOrCreateSymbol(
Twine(
".kernel.agpr_count"));
1303 Ctx->getOrCreateSymbol(
Twine(
".kernel.vgpr_count"));
1305 VgprIndexUnusedMin);
1312 KernelScopeInfo() =
default;
1316 MSTI = Ctx->getSubtargetInfo();
1318 usesSgprAt(SgprIndexUnusedMin = -1);
1319 usesVgprAt(VgprIndexUnusedMin = -1);
1321 usesAgprAt(AgprIndexUnusedMin = -1);
1325 void usesRegister(RegisterKind RegKind,
unsigned DwordRegIndex,
1326 unsigned RegWidth) {
1329 usesSgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1332 usesAgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1335 usesVgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1344 MCAsmParser &Parser;
1346 unsigned ForcedEncodingSize = 0;
1347 bool ForcedDPP =
false;
1348 bool ForcedSDWA =
false;
1349 KernelScopeInfo KernelScope;
1350 const unsigned HwMode;
1355#define GET_ASSEMBLER_HEADER
1356#include "AMDGPUGenAsmMatcher.inc"
1361 unsigned getRegOperandSize(
const MCInstrDesc &
Desc,
unsigned OpNo)
const {
1363 int16_t RCID = MII.getOpRegClassID(
Desc.operands()[OpNo], HwMode);
1368 void createConstantSymbol(StringRef Id, int64_t Val);
1370 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1371 bool OutOfRangeError(SMRange
Range);
1387 bool calculateGPRBlocks(
const FeatureBitset &Features,
const MCExpr *VCCUsed,
1388 const MCExpr *FlatScrUsed,
bool XNACKUsed,
1389 std::optional<bool> EnableWavefrontSize32,
1390 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1391 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1392 const MCExpr *&VGPRBlocks,
const MCExpr *&SGPRBlocks);
1393 bool ParseDirectiveAMDGCNTarget();
1394 bool ParseDirectiveAMDHSACodeObjectVersion();
1395 bool ParseDirectiveAMDHSAKernel();
1396 bool ParseAMDKernelCodeTValue(StringRef
ID, AMDGPUMCKernelCodeT &Header);
1397 bool ParseDirectiveAMDKernelCodeT();
1399 bool subtargetHasRegister(
const MCRegisterInfo &
MRI, MCRegister
Reg);
1400 bool ParseDirectiveAMDGPUHsaKernel();
1402 bool ParseDirectiveISAVersion();
1403 bool ParseDirectiveHSAMetadata();
1404 bool ParseDirectivePALMetadataBegin();
1405 bool ParseDirectivePALMetadata();
1406 bool ParseDirectiveAMDGPULDS();
1410 bool ParseToEndDirective(
const char *AssemblerDirectiveBegin,
1411 const char *AssemblerDirectiveEnd,
1412 std::string &CollectString);
1414 bool AddNextRegisterToList(MCRegister &
Reg,
unsigned &RegWidth,
1415 RegisterKind RegKind, MCRegister Reg1, SMLoc Loc);
1416 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &
Reg,
1417 unsigned &RegNum,
unsigned &RegWidth,
1418 bool RestoreOnFailure =
false);
1419 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &
Reg,
1420 unsigned &RegNum,
unsigned &RegWidth,
1421 SmallVectorImpl<AsmToken> &Tokens);
1422 MCRegister ParseRegularReg(RegisterKind &RegKind,
unsigned &RegNum,
1424 SmallVectorImpl<AsmToken> &Tokens);
1425 MCRegister ParseSpecialReg(RegisterKind &RegKind,
unsigned &RegNum,
1427 SmallVectorImpl<AsmToken> &Tokens);
1428 MCRegister ParseRegList(RegisterKind &RegKind,
unsigned &RegNum,
1430 SmallVectorImpl<AsmToken> &Tokens);
1431 bool ParseRegRange(
unsigned &Num,
unsigned &Width,
unsigned &
SubReg);
1432 MCRegister getRegularReg(RegisterKind RegKind,
unsigned RegNum,
1433 unsigned SubReg,
unsigned RegWidth, SMLoc Loc);
1436 bool isRegister(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1437 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1438 void initializeGprCountSymbol(RegisterKind RegKind);
1439 bool updateGprCountSymbols(RegisterKind RegKind,
unsigned DwordRegIndex,
1446 OperandMode_Default,
1450 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1452 AMDGPUAsmParser(
const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1453 const MCInstrInfo &MII,
const MCTargetOptions &
Options)
1454 : MCTargetAsmParser(
Options, STI, MII), Parser(_Parser),
1455 HwMode(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)) {
1458 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1462 createConstantSymbol(
".amdgcn.gfx_generation_number",
ISA.Major);
1463 createConstantSymbol(
".amdgcn.gfx_generation_minor",
ISA.Minor);
1464 createConstantSymbol(
".amdgcn.gfx_generation_stepping",
ISA.Stepping);
1466 createConstantSymbol(
".option.machine_version_major",
ISA.Major);
1467 createConstantSymbol(
".option.machine_version_minor",
ISA.Minor);
1468 createConstantSymbol(
".option.machine_version_stepping",
ISA.Stepping);
1471 initializeGprCountSymbol(IS_VGPR);
1472 initializeGprCountSymbol(IS_SGPR);
1477 createConstantSymbol(Symbol, Code);
1479 createConstantSymbol(
"UC_VERSION_W64_BIT", 0x2000);
1480 createConstantSymbol(
"UC_VERSION_W32_BIT", 0x4000);
1481 createConstantSymbol(
"UC_VERSION_MDP_BIT", 0x8000);
1553 bool isWave32()
const {
return getAvailableFeatures()[Feature_isWave32Bit]; }
1555 bool isWave64()
const {
return getAvailableFeatures()[Feature_isWave64Bit]; }
1557 bool hasInv2PiInlineImm()
const {
1558 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1561 bool has64BitLiterals()
const {
1562 return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1565 bool hasFlatOffsets()
const {
1566 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1569 bool hasTrue16Insts()
const {
1570 return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1574 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1577 bool hasSGPR102_SGPR103()
const {
1581 bool hasSGPR104_SGPR105()
const {
return isGFX10Plus(); }
1583 bool hasIntClamp()
const {
1584 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1587 bool hasPartialNSAEncoding()
const {
1588 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1591 bool hasGloballyAddressableScratch()
const {
1592 return getFeatureBits()[AMDGPU::FeatureGloballyAddressableScratch];
1605 AMDGPUTargetStreamer &getTargetStreamer() {
1606 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1607 return static_cast<AMDGPUTargetStreamer &
>(TS);
1613 return const_cast<AMDGPUAsmParser *
>(
this)->MCTargetAsmParser::getContext();
1616 const MCRegisterInfo *getMRI()
const {
1620 const MCInstrInfo *getMII()
const {
1626 const FeatureBitset &getFeatureBits()
const {
1627 return getSTI().getFeatureBits();
1630 void setForcedEncodingSize(
unsigned Size) { ForcedEncodingSize =
Size; }
1631 void setForcedDPP(
bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1632 void setForcedSDWA(
bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1634 unsigned getForcedEncodingSize()
const {
return ForcedEncodingSize; }
1635 bool isForcedVOP3()
const {
return ForcedEncodingSize == 64; }
1636 bool isForcedDPP()
const {
return ForcedDPP; }
1637 bool isForcedSDWA()
const {
return ForcedSDWA; }
1638 ArrayRef<unsigned> getMatchedVariants()
const;
1639 StringRef getMatchedVariantName()
const;
1641 std::unique_ptr<AMDGPUOperand> parseRegister(
bool RestoreOnFailure =
false);
1642 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1643 bool RestoreOnFailure);
1644 bool parseRegister(MCRegister &
Reg, SMLoc &StartLoc, SMLoc &EndLoc)
override;
1645 ParseStatus tryParseRegister(MCRegister &
Reg, SMLoc &StartLoc,
1646 SMLoc &EndLoc)
override;
1647 unsigned checkTargetMatchPredicate(MCInst &Inst)
override;
1648 unsigned validateTargetOperandClass(MCParsedAsmOperand &
Op,
1649 unsigned Kind)
override;
1650 bool matchAndEmitInstruction(SMLoc IDLoc,
unsigned &Opcode,
1652 uint64_t &ErrorInfo,
1653 bool MatchingInlineAsm)
override;
1654 bool ParseDirective(AsmToken DirectiveID)
override;
1656 OperandMode
Mode = OperandMode_Default);
1657 StringRef parseMnemonicSuffix(StringRef Name);
1658 bool parseInstruction(ParseInstructionInfo &
Info, StringRef Name,
1664 ParseStatus parseIntWithPrefix(
const char *Prefix, int64_t &
Int);
1668 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1669 std::function<
bool(int64_t &)> ConvertResult =
nullptr);
1671 ParseStatus parseOperandArrayWithPrefix(
1673 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1674 bool (*ConvertResult)(int64_t &) =
nullptr);
1678 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1679 unsigned getCPolKind(StringRef Id, StringRef Mnemo,
bool &Disabling)
const;
1683 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &
Value,
1687 ArrayRef<const char *> Ids,
1691 ArrayRef<const char *> Ids,
1692 AMDGPUOperand::ImmTy
Type);
1695 bool isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1696 bool isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1697 bool isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1698 bool isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1699 bool parseSP3NegModifier();
1706 bool AllowImm =
true);
1708 bool AllowImm =
true);
1713 AMDGPUOperand::ImmTy ImmTy);
1718 AMDGPUOperand::ImmTy
Type);
1722 AMDGPUOperand::ImmTy
Type);
1726 AMDGPUOperand::ImmTy
Type);
1730 ParseStatus parseDfmtNfmt(int64_t &
Format);
1731 ParseStatus parseUfmt(int64_t &
Format);
1732 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1734 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1737 ParseStatus parseSymbolicOrNumericFormat(int64_t &
Format);
1738 ParseStatus parseNumericFormat(int64_t &
Format);
1742 bool tryParseFmt(
const char *Pref, int64_t MaxVal, int64_t &Val);
1743 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1747 bool parseCnt(int64_t &IntVal);
1750 bool parseDepCtr(int64_t &IntVal,
unsigned &Mask);
1751 void depCtrError(SMLoc Loc,
int ErrorId, StringRef DepCtrName);
1754 bool parseDelay(int64_t &Delay);
1760 struct OperandInfoTy {
1763 bool IsSymbolic =
false;
1764 bool IsDefined =
false;
1766 OperandInfoTy(int64_t Val) : Val(Val) {}
1769 struct StructuredOpField : OperandInfoTy {
1773 bool IsDefined =
false;
1775 StructuredOpField(StringLiteral Id, StringLiteral Desc,
unsigned Width,
1777 : OperandInfoTy(
Default), Id(Id), Desc(Desc), Width(Width) {}
1778 virtual ~StructuredOpField() =
default;
1780 bool Error(AMDGPUAsmParser &Parser,
const Twine &Err)
const {
1781 Parser.Error(Loc,
"invalid " + Desc +
": " + Err);
1785 virtual bool validate(AMDGPUAsmParser &Parser)
const {
1787 return Error(Parser,
"not supported on this GPU");
1789 return Error(Parser,
"only " + Twine(Width) +
"-bit values are legal");
1797 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &
Op, OperandInfoTy &Stream);
1798 bool validateSendMsg(
const OperandInfoTy &Msg,
1799 const OperandInfoTy &
Op,
1800 const OperandInfoTy &Stream);
1802 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &
Offset,
1803 OperandInfoTy &Width);
1805 static SMLoc getLaterLoc(SMLoc a, SMLoc b);
1812 SMLoc getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
1814 SMLoc getImmLoc(AMDGPUOperand::ImmTy
Type,
1818 bool validateInstruction(
const MCInst &Inst, SMLoc IDLoc,
1825 std::optional<unsigned> checkVOPDRegBankConstraints(
const MCInst &Inst,
1828 bool tryVOPD(
const MCInst &Inst);
1829 bool tryVOPD3(
const MCInst &Inst);
1830 bool tryAnotherVOPDEncoding(
const MCInst &Inst);
1832 bool validateIntClampSupported(
const MCInst &Inst);
1833 bool validateMIMGAtomicDMask(
const MCInst &Inst);
1834 bool validateMIMGGatherDMask(
const MCInst &Inst);
1836 bool validateMIMGDataSize(
const MCInst &Inst, SMLoc IDLoc);
1837 bool validateMIMGAddrSize(
const MCInst &Inst, SMLoc IDLoc);
1838 bool validateMIMGD16(
const MCInst &Inst);
1840 bool validateTensorR128(
const MCInst &Inst);
1841 bool validateMIMGMSAA(
const MCInst &Inst);
1842 bool validateOpSel(
const MCInst &Inst);
1843 bool validateTrue16OpSel(
const MCInst &Inst);
1844 bool validateNeg(
const MCInst &Inst, AMDGPU::OpName OpName);
1846 bool validateVccOperand(MCRegister
Reg)
const;
1851 bool validateAGPRLdSt(
const MCInst &Inst)
const;
1852 bool validateVGPRAlign(
const MCInst &Inst)
const;
1856 bool validateDivScale(
const MCInst &Inst);
1861 const unsigned CPol);
1866 unsigned getConstantBusLimit(
unsigned Opcode)
const;
1867 bool usesConstantBus(
const MCInst &Inst,
unsigned OpIdx);
1868 bool isInlineConstant(
const MCInst &Inst,
unsigned OpIdx)
const;
1869 unsigned findImplicitSGPRReadInVOP(
const MCInst &Inst)
const;
1871 bool isSupportedMnemo(StringRef Mnemo,
1872 const FeatureBitset &FBS);
1873 bool isSupportedMnemo(StringRef Mnemo,
1874 const FeatureBitset &FBS,
1875 ArrayRef<unsigned> Variants);
1876 bool checkUnsupportedInstruction(StringRef Name, SMLoc IDLoc);
1878 bool isId(
const StringRef Id)
const;
1879 bool isId(
const AsmToken &Token,
const StringRef Id)
const;
1881 StringRef getId()
const;
1882 bool trySkipId(
const StringRef Id);
1883 bool trySkipId(
const StringRef Pref,
const StringRef Id);
1887 bool parseString(StringRef &Val,
const StringRef ErrMsg =
"expected a string");
1888 bool parseId(StringRef &Val,
const StringRef ErrMsg =
"");
1894 StringRef getTokenStr()
const;
1895 AsmToken peekToken(
bool ShouldSkipSpace =
true);
1897 SMLoc getLoc()
const;
1901 void onBeginOfFile()
override;
1902 bool parsePrimaryExpr(
const MCExpr *&Res, SMLoc &EndLoc)
override;
1913 bool parseSwizzleOperand(int64_t &
Op,
const unsigned MinVal,
1914 const unsigned MaxVal,
const Twine &ErrMsg,
1916 bool parseSwizzleOperands(
const unsigned OpNum, int64_t*
Op,
1917 const unsigned MinVal,
1918 const unsigned MaxVal,
1919 const StringRef ErrMsg);
1921 bool parseSwizzleOffset(int64_t &
Imm);
1922 bool parseSwizzleMacro(int64_t &
Imm);
1923 bool parseSwizzleQuadPerm(int64_t &
Imm);
1924 bool parseSwizzleBitmaskPerm(int64_t &
Imm);
1925 bool parseSwizzleBroadcast(int64_t &
Imm);
1926 bool parseSwizzleSwap(int64_t &
Imm);
1927 bool parseSwizzleReverse(int64_t &
Imm);
1928 bool parseSwizzleFFT(int64_t &
Imm);
1929 bool parseSwizzleRotate(int64_t &
Imm);
1932 int64_t parseGPRIdxMacro();
1940 OptionalImmIndexMap &OptionalIdx);
1949 OptionalImmIndexMap &OptionalIdx);
1951 OptionalImmIndexMap &OptionalIdx);
1955 void cvtOpSelHelper(MCInst &Inst,
unsigned OpSel);
1957 bool parseDimId(
unsigned &Encoding);
1959 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1963 int64_t parseDPPCtrlSel(StringRef Ctrl);
1964 int64_t parseDPPCtrlPerm();
1970 bool IsDPP8 =
false);
1976 AMDGPUOperand::ImmTy
Type);
1984 uint64_t BasicInstType,
1985 bool SkipDstVcc =
false,
1986 bool SkipSrcVcc =
false);
2094bool AMDGPUOperand::isInlinableImm(
MVT type)
const {
2104 if (!isImmTy(ImmTyNone)) {
2115 if (type == MVT::f64 || type == MVT::i64) {
2117 AsmParser->hasInv2PiInlineImm());
2120 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64,
Imm.Val));
2139 APFloat::rmNearestTiesToEven, &Lost);
2146 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2148 AsmParser->hasInv2PiInlineImm());
2153 static_cast<int32_t
>(FPLiteral.bitcastToAPInt().getZExtValue()),
2154 AsmParser->hasInv2PiInlineImm());
2158 if (type == MVT::f64 || type == MVT::i64) {
2160 AsmParser->hasInv2PiInlineImm());
2169 static_cast<int16_t
>(
Literal.getLoBits(16).getSExtValue()),
2170 type, AsmParser->hasInv2PiInlineImm());
2174 static_cast<int32_t
>(
Literal.getLoBits(32).getZExtValue()),
2175 AsmParser->hasInv2PiInlineImm());
2178bool AMDGPUOperand::isLiteralImm(MVT type)
const {
2180 if (!isImmTy(ImmTyNone)) {
2185 (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();
2190 if (type == MVT::f64 && hasFPModifiers()) {
2210 if (type == MVT::f64) {
2215 if (type == MVT::i64) {
2228 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2229 : (type == MVT::v2i16) ? MVT::f32
2230 : (type == MVT::v2f32) ? MVT::f32
2233 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64,
Imm.Val));
2237bool AMDGPUOperand::isRegClass(
unsigned RCID)
const {
2238 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(
getReg());
2241bool AMDGPUOperand::isVRegWithInputMods()
const {
2242 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2244 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2245 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2248template <
bool IsFake16>
2249bool AMDGPUOperand::isT16_Lo128VRegWithInputMods()
const {
2250 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2251 : AMDGPU::VGPR_16_Lo128RegClassID);
2254template <
bool IsFake16>
bool AMDGPUOperand::isT16VRegWithInputMods()
const {
2255 return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2256 : AMDGPU::VGPR_16RegClassID);
2259bool AMDGPUOperand::isSDWAOperand(MVT type)
const {
2260 if (AsmParser->isVI())
2262 if (AsmParser->isGFX9Plus())
2263 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2267bool AMDGPUOperand::isSDWAFP16Operand()
const {
2268 return isSDWAOperand(MVT::f16);
2271bool AMDGPUOperand::isSDWAFP32Operand()
const {
2272 return isSDWAOperand(MVT::f32);
2275bool AMDGPUOperand::isSDWAInt16Operand()
const {
2276 return isSDWAOperand(MVT::i16);
2279bool AMDGPUOperand::isSDWAInt32Operand()
const {
2280 return isSDWAOperand(MVT::i32);
2283bool AMDGPUOperand::isBoolReg()
const {
2284 return isReg() && ((AsmParser->isWave64() && isSCSrc_b64()) ||
2285 (AsmParser->isWave32() && isSCSrc_b32()));
2288uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val,
unsigned Size)
const
2290 assert(isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2293 const uint64_t FpSignMask = (1ULL << (
Size * 8 - 1));
2305void AMDGPUOperand::addImmOperands(MCInst &Inst,
unsigned N,
bool ApplyModifiers)
const {
2315 addLiteralImmOperand(Inst,
Imm.Val,
2317 isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2319 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2324void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const {
2325 const auto& InstDesc = AsmParser->getMII()->get(Inst.
getOpcode());
2330 if (ApplyModifiers) {
2333 Val = applyInputFPModifiers(Val,
Size);
2337 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2339 bool CanUse64BitLiterals =
2340 AsmParser->has64BitLiterals() &&
2342 MCContext &Ctx = AsmParser->getContext();
2352 AsmParser->hasInv2PiInlineImm())) {
2360 bool HasMandatoryLiteral =
2363 if (
Literal.getLoBits(32) != 0 &&
2364 (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
2365 !HasMandatoryLiteral) {
2366 const_cast<AMDGPUAsmParser *
>(AsmParser)->
Warning(
2368 "Can't encode literal as exact 64-bit floating-point operand. "
2369 "Low 32-bits will be set to zero");
2370 Val &= 0xffffffff00000000u;
2376 CanUse64BitLiterals &&
Lo_32(Val) != 0) {
2391 if (CanUse64BitLiterals &&
Lo_32(Val) != 0) {
2403 if (AsmParser->hasInv2PiInlineImm() &&
Literal == 0x3fc45f306725feed) {
2437 APFloat::rmNearestTiesToEven, &lost);
2441 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2482 if (!AsmParser->has64BitLiterals() ||
2483 getModifiers().
Lit == LitModifier::Lit)
2504 if (!AsmParser->has64BitLiterals()) {
2505 Val =
static_cast<uint64_t
>(Val) << 32;
2512 if (getModifiers().
Lit == LitModifier::Lit ||
2513 (getModifiers().
Lit != LitModifier::Lit64 &&
2515 Val =
static_cast<uint64_t
>(Val) << 32;
2518 if (CanUse64BitLiterals &&
Lo_32(Val) != 0) {
2542 getModifiers().
Lit != LitModifier::Lit64)
2545 if (CanUse64BitLiterals &&
Lo_32(Val) != 0) {
2558void AMDGPUOperand::addRegOperands(MCInst &Inst,
unsigned N)
const {
2563bool AMDGPUOperand::isInlineValue()
const {
2571void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2582 if (Is == IS_VGPR) {
2586 return AMDGPU::VGPR_32RegClassID;
2588 return AMDGPU::VReg_64RegClassID;
2590 return AMDGPU::VReg_96RegClassID;
2592 return AMDGPU::VReg_128RegClassID;
2594 return AMDGPU::VReg_160RegClassID;
2596 return AMDGPU::VReg_192RegClassID;
2598 return AMDGPU::VReg_224RegClassID;
2600 return AMDGPU::VReg_256RegClassID;
2602 return AMDGPU::VReg_288RegClassID;
2604 return AMDGPU::VReg_320RegClassID;
2606 return AMDGPU::VReg_352RegClassID;
2608 return AMDGPU::VReg_384RegClassID;
2610 return AMDGPU::VReg_512RegClassID;
2612 return AMDGPU::VReg_1024RegClassID;
2614 }
else if (Is == IS_TTMP) {
2618 return AMDGPU::TTMP_32RegClassID;
2620 return AMDGPU::TTMP_64RegClassID;
2622 return AMDGPU::TTMP_128RegClassID;
2624 return AMDGPU::TTMP_256RegClassID;
2626 return AMDGPU::TTMP_512RegClassID;
2628 }
else if (Is == IS_SGPR) {
2632 return AMDGPU::SGPR_32RegClassID;
2634 return AMDGPU::SGPR_64RegClassID;
2636 return AMDGPU::SGPR_96RegClassID;
2638 return AMDGPU::SGPR_128RegClassID;
2640 return AMDGPU::SGPR_160RegClassID;
2642 return AMDGPU::SGPR_192RegClassID;
2644 return AMDGPU::SGPR_224RegClassID;
2646 return AMDGPU::SGPR_256RegClassID;
2648 return AMDGPU::SGPR_288RegClassID;
2650 return AMDGPU::SGPR_320RegClassID;
2652 return AMDGPU::SGPR_352RegClassID;
2654 return AMDGPU::SGPR_384RegClassID;
2656 return AMDGPU::SGPR_512RegClassID;
2658 }
else if (Is == IS_AGPR) {
2662 return AMDGPU::AGPR_32RegClassID;
2664 return AMDGPU::AReg_64RegClassID;
2666 return AMDGPU::AReg_96RegClassID;
2668 return AMDGPU::AReg_128RegClassID;
2670 return AMDGPU::AReg_160RegClassID;
2672 return AMDGPU::AReg_192RegClassID;
2674 return AMDGPU::AReg_224RegClassID;
2676 return AMDGPU::AReg_256RegClassID;
2678 return AMDGPU::AReg_288RegClassID;
2680 return AMDGPU::AReg_320RegClassID;
2682 return AMDGPU::AReg_352RegClassID;
2684 return AMDGPU::AReg_384RegClassID;
2686 return AMDGPU::AReg_512RegClassID;
2688 return AMDGPU::AReg_1024RegClassID;
2696 .
Case(
"exec", AMDGPU::EXEC)
2697 .
Case(
"vcc", AMDGPU::VCC)
2698 .
Case(
"flat_scratch", AMDGPU::FLAT_SCR)
2699 .
Case(
"xnack_mask", AMDGPU::XNACK_MASK)
2700 .
Case(
"shared_base", AMDGPU::SRC_SHARED_BASE)
2701 .
Case(
"src_shared_base", AMDGPU::SRC_SHARED_BASE)
2702 .
Case(
"shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2703 .
Case(
"src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2704 .
Case(
"private_base", AMDGPU::SRC_PRIVATE_BASE)
2705 .
Case(
"src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2706 .
Case(
"private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2707 .
Case(
"src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2708 .
Case(
"src_flat_scratch_base_lo", AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)
2709 .
Case(
"src_flat_scratch_base_hi", AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)
2710 .
Case(
"pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2711 .
Case(
"src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2712 .
Case(
"lds_direct", AMDGPU::LDS_DIRECT)
2713 .
Case(
"src_lds_direct", AMDGPU::LDS_DIRECT)
2714 .
Case(
"m0", AMDGPU::M0)
2715 .
Case(
"vccz", AMDGPU::SRC_VCCZ)
2716 .
Case(
"src_vccz", AMDGPU::SRC_VCCZ)
2717 .
Case(
"execz", AMDGPU::SRC_EXECZ)
2718 .
Case(
"src_execz", AMDGPU::SRC_EXECZ)
2719 .
Case(
"scc", AMDGPU::SRC_SCC)
2720 .
Case(
"src_scc", AMDGPU::SRC_SCC)
2721 .
Case(
"tba", AMDGPU::TBA)
2722 .
Case(
"tma", AMDGPU::TMA)
2723 .
Case(
"flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2724 .
Case(
"flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2725 .
Case(
"xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2726 .
Case(
"xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2727 .
Case(
"vcc_lo", AMDGPU::VCC_LO)
2728 .
Case(
"vcc_hi", AMDGPU::VCC_HI)
2729 .
Case(
"exec_lo", AMDGPU::EXEC_LO)
2730 .
Case(
"exec_hi", AMDGPU::EXEC_HI)
2731 .
Case(
"tma_lo", AMDGPU::TMA_LO)
2732 .
Case(
"tma_hi", AMDGPU::TMA_HI)
2733 .
Case(
"tba_lo", AMDGPU::TBA_LO)
2734 .
Case(
"tba_hi", AMDGPU::TBA_HI)
2735 .
Case(
"pc", AMDGPU::PC_REG)
2736 .
Case(
"null", AMDGPU::SGPR_NULL)
2740bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2741 SMLoc &EndLoc,
bool RestoreOnFailure) {
2742 auto R = parseRegister();
2743 if (!R)
return true;
2745 RegNo =
R->getReg();
2746 StartLoc =
R->getStartLoc();
2747 EndLoc =
R->getEndLoc();
2751bool AMDGPUAsmParser::parseRegister(MCRegister &
Reg, SMLoc &StartLoc,
2753 return ParseRegister(
Reg, StartLoc, EndLoc,
false);
2756ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &
Reg, SMLoc &StartLoc,
2758 bool Result = ParseRegister(
Reg, StartLoc, EndLoc,
true);
2759 bool PendingErrors = getParser().hasPendingError();
2760 getParser().clearPendingErrors();
2768bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &
Reg,
unsigned &RegWidth,
2769 RegisterKind RegKind,
2770 MCRegister Reg1, SMLoc Loc) {
2773 if (
Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2778 if (
Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2779 Reg = AMDGPU::FLAT_SCR;
2783 if (
Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2784 Reg = AMDGPU::XNACK_MASK;
2788 if (
Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2793 if (
Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2798 if (
Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2803 Error(Loc,
"register does not fit in the list");
2809 if (Reg1 !=
Reg + RegWidth / 32) {
2810 Error(Loc,
"registers in a list must have consecutive indices");
2828 {{
"ttmp"}, IS_TTMP},
2834 return Kind == IS_VGPR ||
2842 if (Str.starts_with(
Reg.Name))
2848 return !Str.getAsInteger(10, Num);
2852AMDGPUAsmParser::isRegister(
const AsmToken &Token,
2853 const AsmToken &NextToken)
const {
2868 StringRef RegSuffix = Str.substr(
RegName.size());
2869 if (!RegSuffix.
empty()) {
2887AMDGPUAsmParser::isRegister()
2889 return isRegister(
getToken(), peekToken());
2892MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
unsigned RegNum,
2893 unsigned SubReg,
unsigned RegWidth,
2897 unsigned AlignSize = 1;
2898 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2904 if (RegNum % AlignSize != 0) {
2905 Error(Loc,
"invalid register alignment");
2906 return MCRegister();
2909 unsigned RegIdx = RegNum / AlignSize;
2912 Error(Loc,
"invalid or unsupported register size");
2913 return MCRegister();
2917 const MCRegisterClass RC =
TRI->getRegClass(RCID);
2918 if (RegIdx >= RC.
getNumRegs() || (RegKind == IS_VGPR && RegIdx > 255)) {
2919 Error(Loc,
"register index is out of range");
2920 return AMDGPU::NoRegister;
2923 if (RegKind == IS_VGPR && !
isGFX1250() && RegIdx + RegWidth / 32 > 256) {
2924 Error(Loc,
"register index is out of range");
2925 return MCRegister();
2941bool AMDGPUAsmParser::ParseRegRange(
unsigned &Num,
unsigned &RegWidth,
2943 int64_t RegLo, RegHi;
2947 SMLoc FirstIdxLoc = getLoc();
2954 SecondIdxLoc = getLoc();
2965 Error(FirstIdxLoc,
"invalid register index");
2970 Error(SecondIdxLoc,
"invalid register index");
2974 if (RegLo > RegHi) {
2975 Error(FirstIdxLoc,
"first register index should not exceed second index");
2979 if (RegHi == RegLo) {
2980 StringRef RegSuffix = getTokenStr();
2981 if (RegSuffix ==
".l") {
2984 }
else if (RegSuffix ==
".h") {
2990 Num =
static_cast<unsigned>(RegLo);
2991 RegWidth = 32 * ((RegHi - RegLo) + 1);
2996MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2999 SmallVectorImpl<AsmToken> &Tokens) {
3005 RegKind = IS_SPECIAL;
3012MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
3015 SmallVectorImpl<AsmToken> &Tokens) {
3017 StringRef
RegName = getTokenStr();
3018 auto Loc = getLoc();
3022 Error(Loc,
"invalid register name");
3023 return MCRegister();
3031 unsigned SubReg = NoSubRegister;
3032 if (!RegSuffix.
empty()) {
3040 Error(Loc,
"invalid register index");
3041 return MCRegister();
3046 if (!ParseRegRange(RegNum, RegWidth,
SubReg))
3047 return MCRegister();
3050 return getRegularReg(RegKind, RegNum,
SubReg, RegWidth, Loc);
3053MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
3054 unsigned &RegNum,
unsigned &RegWidth,
3055 SmallVectorImpl<AsmToken> &Tokens) {
3057 auto ListLoc = getLoc();
3060 "expected a register or a list of registers")) {
3061 return MCRegister();
3066 auto Loc = getLoc();
3067 if (!ParseAMDGPURegister(RegKind,
Reg, RegNum, RegWidth))
3068 return MCRegister();
3069 if (RegWidth != 32) {
3070 Error(Loc,
"expected a single 32-bit register");
3071 return MCRegister();
3075 RegisterKind NextRegKind;
3077 unsigned NextRegNum, NextRegWidth;
3080 if (!ParseAMDGPURegister(NextRegKind, NextReg,
3081 NextRegNum, NextRegWidth,
3083 return MCRegister();
3085 if (NextRegWidth != 32) {
3086 Error(Loc,
"expected a single 32-bit register");
3087 return MCRegister();
3089 if (NextRegKind != RegKind) {
3090 Error(Loc,
"registers in a list must be of the same kind");
3091 return MCRegister();
3093 if (!AddNextRegisterToList(
Reg, RegWidth, RegKind, NextReg, Loc))
3094 return MCRegister();
3098 "expected a comma or a closing square bracket")) {
3099 return MCRegister();
3103 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3108bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3109 MCRegister &
Reg,
unsigned &RegNum,
3111 SmallVectorImpl<AsmToken> &Tokens) {
3112 auto Loc = getLoc();
3116 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3118 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3120 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3125 assert(Parser.hasPendingError());
3129 if (!subtargetHasRegister(*
TRI,
Reg)) {
3130 if (
Reg == AMDGPU::SGPR_NULL) {
3131 Error(Loc,
"'null' operand is not supported on this GPU");
3134 " register not available on this GPU");
3142bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3143 MCRegister &
Reg,
unsigned &RegNum,
3145 bool RestoreOnFailure ) {
3149 if (ParseAMDGPURegister(RegKind,
Reg, RegNum, RegWidth, Tokens)) {
3150 if (RestoreOnFailure) {
3151 while (!Tokens.
empty()) {
3160std::optional<StringRef>
3161AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3164 return StringRef(
".amdgcn.next_free_vgpr");
3166 return StringRef(
".amdgcn.next_free_sgpr");
3168 return std::nullopt;
3172void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3173 auto SymbolName = getGprCountSymbolName(RegKind);
3174 assert(SymbolName &&
"initializing invalid register kind");
3180bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3181 unsigned DwordRegIndex,
3182 unsigned RegWidth) {
3187 auto SymbolName = getGprCountSymbolName(RegKind);
3192 int64_t NewMax = DwordRegIndex +
divideCeil(RegWidth, 32) - 1;
3196 return !
Error(getLoc(),
3197 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3201 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3203 if (OldCount <= NewMax)
3209std::unique_ptr<AMDGPUOperand>
3210AMDGPUAsmParser::parseRegister(
bool RestoreOnFailure) {
3212 SMLoc StartLoc = Tok.getLoc();
3213 SMLoc EndLoc = Tok.getEndLoc();
3214 RegisterKind RegKind;
3216 unsigned RegNum, RegWidth;
3218 if (!ParseAMDGPURegister(RegKind,
Reg, RegNum, RegWidth)) {
3222 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3225 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3226 return AMDGPUOperand::CreateReg(
this,
Reg, StartLoc, EndLoc);
3233 if (isRegister() || isModifier())
3236 if (
Lit == LitModifier::None) {
3237 if (trySkipId(
"lit"))
3238 Lit = LitModifier::Lit;
3239 else if (trySkipId(
"lit64"))
3240 Lit = LitModifier::Lit64;
3242 if (
Lit != LitModifier::None) {
3245 ParseStatus S = parseImm(
Operands, HasSP3AbsModifier,
Lit);
3254 const auto& NextTok = peekToken();
3257 bool Negate =
false;
3265 AMDGPUOperand::Modifiers Mods;
3273 StringRef Num = getTokenStr();
3276 APFloat RealVal(APFloat::IEEEdouble());
3277 auto roundMode = APFloat::rmNearestTiesToEven;
3278 if (
errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3281 RealVal.changeSign();
3284 AMDGPUOperand::CreateImm(
this, RealVal.bitcastToAPInt().getZExtValue(), S,
3285 AMDGPUOperand::ImmTyNone,
true));
3286 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3287 Op.setModifiers(Mods);
3296 if (HasSP3AbsModifier) {
3305 if (getParser().parsePrimaryExpr(Expr, EndLoc,
nullptr))
3308 if (Parser.parseExpression(Expr))
3312 if (Expr->evaluateAsAbsolute(IntVal)) {
3313 Operands.push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
3314 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3315 Op.setModifiers(Mods);
3317 if (
Lit != LitModifier::None)
3319 Operands.push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
3332 if (
auto R = parseRegister()) {
3342 ParseStatus Res = parseReg(
Operands);
3351AMDGPUAsmParser::isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3354 return str ==
"abs" || str ==
"neg" || str ==
"sext";
3360AMDGPUAsmParser::isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3365AMDGPUAsmParser::isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3366 return isNamedOperandModifier(Token, NextToken) || Token.
is(
AsmToken::Pipe);
3370AMDGPUAsmParser::isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3371 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3388AMDGPUAsmParser::isModifier() {
3391 AsmToken NextToken[2];
3392 peekTokens(NextToken);
3394 return isOperandModifier(Tok, NextToken[0]) ||
3395 (Tok.
is(
AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3396 isOpcodeModifierWithVal(Tok, NextToken[0]);
3422AMDGPUAsmParser::parseSP3NegModifier() {
3424 AsmToken NextToken[2];
3425 peekTokens(NextToken);
3428 (isRegister(NextToken[0], NextToken[1]) ||
3430 isId(NextToken[0],
"abs"))) {
3447 return Error(getLoc(),
"invalid syntax, expected 'neg' modifier");
3449 SP3Neg = parseSP3NegModifier();
3452 Neg = trySkipId(
"neg");
3454 return Error(Loc,
"expected register or immediate");
3458 Abs = trySkipId(
"abs");
3463 if (trySkipId(
"lit")) {
3464 Lit = LitModifier::Lit;
3467 }
else if (trySkipId(
"lit64")) {
3468 Lit = LitModifier::Lit64;
3471 if (!has64BitLiterals())
3472 return Error(Loc,
"lit64 is not supported on this GPU");
3478 return Error(Loc,
"expected register or immediate");
3487 return (SP3Neg || Neg || SP3Abs || Abs ||
Lit != LitModifier::None)
3491 if (
Lit != LitModifier::None && !
Operands.back()->isImm())
3492 Error(Loc,
"expected immediate with lit modifier");
3494 if (SP3Abs && !skipToken(
AsmToken::Pipe,
"expected vertical bar"))
3500 if (
Lit != LitModifier::None &&
3504 AMDGPUOperand::Modifiers Mods;
3505 Mods.Abs = Abs || SP3Abs;
3506 Mods.Neg = Neg || SP3Neg;
3509 if (Mods.hasFPModifiers() ||
Lit != LitModifier::None) {
3510 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3512 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3513 Op.setModifiers(Mods);
3521 bool Sext = trySkipId(
"sext");
3522 if (Sext && !skipToken(
AsmToken::LParen,
"expected left paren after sext"))
3537 AMDGPUOperand::Modifiers Mods;
3540 if (Mods.hasIntModifiers()) {
3541 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3543 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3544 Op.setModifiers(Mods);
3551 return parseRegOrImmWithFPInputMods(
Operands,
false);
3555 return parseRegOrImmWithIntInputMods(
Operands,
false);
3559 auto Loc = getLoc();
3560 if (trySkipId(
"off")) {
3561 Operands.push_back(AMDGPUOperand::CreateImm(
this, 0, Loc,
3562 AMDGPUOperand::ImmTyOff,
false));
3569 std::unique_ptr<AMDGPUOperand>
Reg = parseRegister();
3578unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3585 return Match_InvalidOperand;
3587 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3588 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3591 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::dst_sel);
3593 if (!
Op.isImm() ||
Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3594 return Match_InvalidOperand;
3602 if (tryAnotherVOPDEncoding(Inst))
3603 return Match_InvalidOperand;
3605 return Match_Success;
3609 static const unsigned Variants[] = {
3619ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants()
const {
3620 if (isForcedDPP() && isForcedVOP3()) {
3624 if (getForcedEncodingSize() == 32) {
3629 if (isForcedVOP3()) {
3634 if (isForcedSDWA()) {
3640 if (isForcedDPP()) {
3648StringRef AMDGPUAsmParser::getMatchedVariantName()
const {
3649 if (isForcedDPP() && isForcedVOP3())
3652 if (getForcedEncodingSize() == 32)
3667unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(
const MCInst &Inst)
const {
3671 case AMDGPU::FLAT_SCR:
3673 case AMDGPU::VCC_LO:
3674 case AMDGPU::VCC_HI:
3681 return AMDGPU::NoRegister;
3688bool AMDGPUAsmParser::isInlineConstant(
const MCInst &Inst,
3689 unsigned OpIdx)
const {
3743unsigned AMDGPUAsmParser::getConstantBusLimit(
unsigned Opcode)
const {
3749 case AMDGPU::V_LSHLREV_B64_e64:
3750 case AMDGPU::V_LSHLREV_B64_gfx10:
3751 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3752 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3753 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3754 case AMDGPU::V_LSHRREV_B64_e64:
3755 case AMDGPU::V_LSHRREV_B64_gfx10:
3756 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3757 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3758 case AMDGPU::V_ASHRREV_I64_e64:
3759 case AMDGPU::V_ASHRREV_I64_gfx10:
3760 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3761 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3762 case AMDGPU::V_LSHL_B64_e64:
3763 case AMDGPU::V_LSHR_B64_e64:
3764 case AMDGPU::V_ASHR_I64_e64:
3777 bool AddMandatoryLiterals =
false) {
3780 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3784 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immX) : -1;
3786 return {getNamedOperandIdx(Opcode, OpName::src0X),
3787 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3788 getNamedOperandIdx(Opcode, OpName::vsrc2X),
3789 getNamedOperandIdx(Opcode, OpName::src0Y),
3790 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3791 getNamedOperandIdx(Opcode, OpName::vsrc2Y),
3796 return {getNamedOperandIdx(Opcode, OpName::src0),
3797 getNamedOperandIdx(Opcode, OpName::src1),
3798 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3801bool AMDGPUAsmParser::usesConstantBus(
const MCInst &Inst,
unsigned OpIdx) {
3804 return !isInlineConstant(Inst,
OpIdx);
3811 return isSGPR(PReg,
TRI) && PReg != SGPR_NULL;
3822 const unsigned Opcode = Inst.
getOpcode();
3823 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3826 if (!LaneSelOp.
isReg())
3829 return LaneSelReg ==
M0 || LaneSelReg == M0_gfxpre11;
3832bool AMDGPUAsmParser::validateConstantBusLimitations(
3834 const unsigned Opcode = Inst.
getOpcode();
3835 const MCInstrDesc &
Desc = MII.
get(Opcode);
3836 MCRegister LastSGPR;
3837 unsigned ConstantBusUseCount = 0;
3838 unsigned NumLiterals = 0;
3839 unsigned LiteralSize;
3841 if (!(
Desc.TSFlags &
3856 SmallDenseSet<unsigned> SGPRsUsed;
3857 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3858 if (SGPRUsed != AMDGPU::NoRegister) {
3859 SGPRsUsed.
insert(SGPRUsed);
3860 ++ConstantBusUseCount;
3865 unsigned ConstantBusLimit = getConstantBusLimit(Opcode);
3867 for (
int OpIdx : OpIndices) {
3872 if (usesConstantBus(Inst,
OpIdx)) {
3881 if (SGPRsUsed.
insert(LastSGPR).second) {
3882 ++ConstantBusUseCount;
3902 if (NumLiterals == 0) {
3905 }
else if (LiteralSize !=
Size) {
3911 if (ConstantBusUseCount + NumLiterals > ConstantBusLimit) {
3913 "invalid operand (violates constant bus restrictions)");
3920std::optional<unsigned>
3921AMDGPUAsmParser::checkVOPDRegBankConstraints(
const MCInst &Inst,
bool AsVOPD3) {
3923 const unsigned Opcode = Inst.
getOpcode();
3929 auto getVRegIdx = [&](unsigned,
unsigned OperandIdx) {
3930 const MCOperand &Opr = Inst.
getOperand(OperandIdx);
3938 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
3939 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
3940 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250;
3944 for (
auto OpName : {OpName::src0X, OpName::src0Y}) {
3945 int I = getNamedOperandIdx(Opcode, OpName);
3949 int64_t
Imm =
Op.getImm();
3955 for (
auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
3956 OpName::vsrc2Y, OpName::imm}) {
3957 int I = getNamedOperandIdx(Opcode, OpName);
3967 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
3968 getVRegIdx, *
TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
3970 return InvalidCompOprIdx;
3973bool AMDGPUAsmParser::validateVOPD(
const MCInst &Inst,
3980 for (
const std::unique_ptr<MCParsedAsmOperand> &Operand :
Operands) {
3981 AMDGPUOperand &
Op = (AMDGPUOperand &)*Operand;
3982 if ((
Op.isRegKind() ||
Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
3984 Error(
Op.getStartLoc(),
"ABS not allowed in VOPD3 instructions");
3988 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
3989 if (!InvalidCompOprIdx.has_value())
3992 auto CompOprIdx = *InvalidCompOprIdx;
3995 std::max(InstInfo[
VOPD::X].getIndexInParsedOperands(CompOprIdx),
3996 InstInfo[
VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3999 auto Loc = ((AMDGPUOperand &)*
Operands[ParsedIdx]).getStartLoc();
4000 if (CompOprIdx == VOPD::Component::DST) {
4002 Error(Loc,
"dst registers must be distinct");
4004 Error(Loc,
"one dst register must be even and the other odd");
4006 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
4007 Error(Loc, Twine(
"src") + Twine(CompSrcIdx) +
4008 " operands must use different VGPR banks");
4016bool AMDGPUAsmParser::tryVOPD3(
const MCInst &Inst) {
4018 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst,
false);
4019 if (!InvalidCompOprIdx.has_value())
4023 InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst,
true);
4024 if (InvalidCompOprIdx.has_value()) {
4029 if (*InvalidCompOprIdx == VOPD::Component::DST)
4042bool AMDGPUAsmParser::tryVOPD(
const MCInst &Inst) {
4043 const unsigned Opcode = Inst.
getOpcode();
4058 for (
auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4059 OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4060 OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4061 int I = getNamedOperandIdx(Opcode, OpName);
4068 return !tryVOPD3(Inst);
4073bool AMDGPUAsmParser::tryAnotherVOPDEncoding(
const MCInst &Inst) {
4074 const unsigned Opcode = Inst.
getOpcode();
4079 return tryVOPD(Inst);
4080 return tryVOPD3(Inst);
4083bool AMDGPUAsmParser::validateIntClampSupported(
const MCInst &Inst) {
4089 int ClampIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::clamp);
4100bool AMDGPUAsmParser::validateMIMGDataSize(
const MCInst &Inst,
SMLoc IDLoc) {
4108 int VDataIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
4109 int DMaskIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dmask);
4110 int TFEIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::tfe);
4118 unsigned VDataSize = getRegOperandSize(
Desc, VDataIdx);
4119 unsigned TFESize = (TFEIdx != -1 && Inst.
getOperand(TFEIdx).
getImm()) ? 1 : 0;
4124 bool IsPackedD16 =
false;
4128 int D16Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::d16);
4129 IsPackedD16 = D16Idx >= 0;
4134 if ((VDataSize / 4) ==
DataSize + TFESize)
4139 Modifiers = IsPackedD16 ?
"dmask and d16" :
"dmask";
4141 Modifiers = IsPackedD16 ?
"dmask, d16 and tfe" :
"dmask and tfe";
4143 Error(IDLoc,
Twine(
"image data size does not match ") + Modifiers);
4147bool AMDGPUAsmParser::validateMIMGAddrSize(
const MCInst &Inst, SMLoc IDLoc) {
4156 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4158 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
4160 ? AMDGPU::OpName::srsrc
4161 : AMDGPU::OpName::rsrc;
4162 int SrsrcIdx = AMDGPU::getNamedOperandIdx(
Opc, RSrcOpName);
4163 int DimIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dim);
4164 int A16Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::a16);
4168 assert(SrsrcIdx > VAddr0Idx);
4171 if (BaseOpcode->
BVH) {
4172 if (IsA16 == BaseOpcode->
A16)
4174 Error(IDLoc,
"image address size does not match a16");
4180 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
4181 unsigned ActualAddrSize =
4182 IsNSA ? SrsrcIdx - VAddr0Idx : getRegOperandSize(
Desc, VAddr0Idx) / 4;
4184 unsigned ExpectedAddrSize =
4188 if (hasPartialNSAEncoding() &&
4191 int VAddrLastIdx = SrsrcIdx - 1;
4192 unsigned VAddrLastSize = getRegOperandSize(
Desc, VAddrLastIdx) / 4;
4194 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
4197 if (ExpectedAddrSize > 12)
4198 ExpectedAddrSize = 16;
4203 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4207 if (ActualAddrSize == ExpectedAddrSize)
4210 Error(IDLoc,
"image address size does not match dim and a16");
4214bool AMDGPUAsmParser::validateMIMGAtomicDMask(
const MCInst &Inst) {
4221 if (!
Desc.mayLoad() || !
Desc.mayStore())
4224 int DMaskIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dmask);
4231 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4234bool AMDGPUAsmParser::validateMIMGGatherDMask(
const MCInst &Inst) {
4242 int DMaskIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dmask);
4250 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4253bool AMDGPUAsmParser::validateMIMGDim(
const MCInst &Inst,
4268 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4269 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4276bool AMDGPUAsmParser::validateMIMGMSAA(
const MCInst &Inst) {
4284 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4287 if (!BaseOpcode->
MSAA)
4290 int DimIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dim);
4296 return DimInfo->
MSAA;
4302 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4303 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4304 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4314bool AMDGPUAsmParser::validateMovrels(
const MCInst &Inst,
4323 const int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
4326 const MCOperand &Src0 = Inst.
getOperand(Src0Idx);
4334 Error(getOperandLoc(
Operands, Src0Idx),
"source operand must be a VGPR");
4338bool AMDGPUAsmParser::validateMAIAccWrite(
const MCInst &Inst,
4343 if (
Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4346 const int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
4349 const MCOperand &Src0 = Inst.
getOperand(Src0Idx);
4357 "source operand must be either a VGPR or an inline constant");
4364bool AMDGPUAsmParser::validateMAISrc2(
const MCInst &Inst,
4367 const MCInstrDesc &
Desc = MII.
get(Opcode);
4370 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4373 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4377 if (Inst.
getOperand(Src2Idx).
isImm() && isInlineConstant(Inst, Src2Idx)) {
4379 "inline constants are not allowed for this operand");
4386bool AMDGPUAsmParser::validateMFMA(
const MCInst &Inst,
4394 int BlgpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::blgp);
4395 if (BlgpIdx != -1) {
4396 if (
const MFMA_F8F6F4_Info *
Info = AMDGPU::isMFMA_F8F6F4(
Opc)) {
4397 int CbszIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::cbsz);
4407 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
4409 "wrong register tuple size for cbsz value " + Twine(CBSZ));
4414 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
4416 "wrong register tuple size for blgp value " + Twine(BLGP));
4424 const int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
4428 const MCOperand &Src2 = Inst.
getOperand(Src2Idx);
4432 MCRegister Src2Reg = Src2.
getReg();
4434 if (Src2Reg == DstReg)
4439 .getSizeInBits() <= 128)
4442 if (
TRI->regsOverlap(Src2Reg, DstReg)) {
4444 "source 2 operand must not partially overlap with dst");
4451bool AMDGPUAsmParser::validateDivScale(
const MCInst &Inst) {
4455 case V_DIV_SCALE_F32_gfx6_gfx7:
4456 case V_DIV_SCALE_F32_vi:
4457 case V_DIV_SCALE_F32_gfx10:
4458 case V_DIV_SCALE_F64_gfx6_gfx7:
4459 case V_DIV_SCALE_F64_vi:
4460 case V_DIV_SCALE_F64_gfx10:
4466 for (
auto Name : {AMDGPU::OpName::src0_modifiers,
4467 AMDGPU::OpName::src2_modifiers,
4468 AMDGPU::OpName::src2_modifiers}) {
4479bool AMDGPUAsmParser::validateMIMGD16(
const MCInst &Inst) {
4487 int D16Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::d16);
4496bool AMDGPUAsmParser::validateTensorR128(
const MCInst &Inst) {
4503 int R128Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::r128);
4511 case AMDGPU::V_SUBREV_F32_e32:
4512 case AMDGPU::V_SUBREV_F32_e64:
4513 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4514 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4515 case AMDGPU::V_SUBREV_F32_e32_vi:
4516 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4517 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4518 case AMDGPU::V_SUBREV_F32_e64_vi:
4520 case AMDGPU::V_SUBREV_CO_U32_e32:
4521 case AMDGPU::V_SUBREV_CO_U32_e64:
4522 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4523 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4525 case AMDGPU::V_SUBBREV_U32_e32:
4526 case AMDGPU::V_SUBBREV_U32_e64:
4527 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4528 case AMDGPU::V_SUBBREV_U32_e32_vi:
4529 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4530 case AMDGPU::V_SUBBREV_U32_e64_vi:
4532 case AMDGPU::V_SUBREV_U32_e32:
4533 case AMDGPU::V_SUBREV_U32_e64:
4534 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4535 case AMDGPU::V_SUBREV_U32_e32_vi:
4536 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4537 case AMDGPU::V_SUBREV_U32_e64_vi:
4539 case AMDGPU::V_SUBREV_F16_e32:
4540 case AMDGPU::V_SUBREV_F16_e64:
4541 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4542 case AMDGPU::V_SUBREV_F16_e32_vi:
4543 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4544 case AMDGPU::V_SUBREV_F16_e64_vi:
4546 case AMDGPU::V_SUBREV_U16_e32:
4547 case AMDGPU::V_SUBREV_U16_e64:
4548 case AMDGPU::V_SUBREV_U16_e32_vi:
4549 case AMDGPU::V_SUBREV_U16_e64_vi:
4551 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4552 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4553 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4555 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4556 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4558 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4559 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4561 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4562 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4564 case AMDGPU::V_LSHRREV_B32_e32:
4565 case AMDGPU::V_LSHRREV_B32_e64:
4566 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4567 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4568 case AMDGPU::V_LSHRREV_B32_e32_vi:
4569 case AMDGPU::V_LSHRREV_B32_e64_vi:
4570 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4571 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4573 case AMDGPU::V_ASHRREV_I32_e32:
4574 case AMDGPU::V_ASHRREV_I32_e64:
4575 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4576 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4577 case AMDGPU::V_ASHRREV_I32_e32_vi:
4578 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4579 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4580 case AMDGPU::V_ASHRREV_I32_e64_vi:
4582 case AMDGPU::V_LSHLREV_B32_e32:
4583 case AMDGPU::V_LSHLREV_B32_e64:
4584 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4585 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4586 case AMDGPU::V_LSHLREV_B32_e32_vi:
4587 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4588 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4589 case AMDGPU::V_LSHLREV_B32_e64_vi:
4591 case AMDGPU::V_LSHLREV_B16_e32:
4592 case AMDGPU::V_LSHLREV_B16_e64:
4593 case AMDGPU::V_LSHLREV_B16_e32_vi:
4594 case AMDGPU::V_LSHLREV_B16_e64_vi:
4595 case AMDGPU::V_LSHLREV_B16_gfx10:
4597 case AMDGPU::V_LSHRREV_B16_e32:
4598 case AMDGPU::V_LSHRREV_B16_e64:
4599 case AMDGPU::V_LSHRREV_B16_e32_vi:
4600 case AMDGPU::V_LSHRREV_B16_e64_vi:
4601 case AMDGPU::V_LSHRREV_B16_gfx10:
4603 case AMDGPU::V_ASHRREV_I16_e32:
4604 case AMDGPU::V_ASHRREV_I16_e64:
4605 case AMDGPU::V_ASHRREV_I16_e32_vi:
4606 case AMDGPU::V_ASHRREV_I16_e64_vi:
4607 case AMDGPU::V_ASHRREV_I16_gfx10:
4609 case AMDGPU::V_LSHLREV_B64_e64:
4610 case AMDGPU::V_LSHLREV_B64_gfx10:
4611 case AMDGPU::V_LSHLREV_B64_vi:
4613 case AMDGPU::V_LSHRREV_B64_e64:
4614 case AMDGPU::V_LSHRREV_B64_gfx10:
4615 case AMDGPU::V_LSHRREV_B64_vi:
4617 case AMDGPU::V_ASHRREV_I64_e64:
4618 case AMDGPU::V_ASHRREV_I64_gfx10:
4619 case AMDGPU::V_ASHRREV_I64_vi:
4621 case AMDGPU::V_PK_LSHLREV_B16:
4622 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4623 case AMDGPU::V_PK_LSHLREV_B16_vi:
4625 case AMDGPU::V_PK_LSHRREV_B16:
4626 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4627 case AMDGPU::V_PK_LSHRREV_B16_vi:
4628 case AMDGPU::V_PK_ASHRREV_I16:
4629 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4630 case AMDGPU::V_PK_ASHRREV_I16_vi:
4637bool AMDGPUAsmParser::validateLdsDirect(
const MCInst &Inst,
4639 using namespace SIInstrFlags;
4640 const unsigned Opcode = Inst.
getOpcode();
4641 const MCInstrDesc &
Desc = MII.
get(Opcode);
4646 if ((
Desc.TSFlags & Enc) == 0)
4649 for (
auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4650 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4654 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4658 "lds_direct is not supported on this GPU");
4664 "lds_direct cannot be used with this instruction");
4668 if (SrcName != OpName::src0) {
4670 "lds_direct may be used as src0 only");
4680 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4681 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4682 if (
Op.isFlatOffset())
4683 return Op.getStartLoc();
4688bool AMDGPUAsmParser::validateOffset(
const MCInst &Inst,
4691 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4697 return validateFlatOffset(Inst,
Operands);
4700 return validateSMEMOffset(Inst,
Operands);
4706 const unsigned OffsetSize = 24;
4707 if (!
isUIntN(OffsetSize - 1,
Op.getImm())) {
4709 Twine(
"expected a ") + Twine(OffsetSize - 1) +
4710 "-bit unsigned offset for buffer ops");
4714 const unsigned OffsetSize = 16;
4715 if (!
isUIntN(OffsetSize,
Op.getImm())) {
4717 Twine(
"expected a ") + Twine(OffsetSize) +
"-bit unsigned offset");
4724bool AMDGPUAsmParser::validateFlatOffset(
const MCInst &Inst,
4731 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4735 if (!hasFlatOffsets() &&
Op.getImm() != 0) {
4737 "flat offset modifier is not supported on this GPU");
4744 bool AllowNegative =
4747 if (!
isIntN(OffsetSize,
Op.getImm()) || (!AllowNegative &&
Op.getImm() < 0)) {
4749 Twine(
"expected a ") +
4750 (AllowNegative ? Twine(OffsetSize) +
"-bit signed offset"
4751 : Twine(OffsetSize - 1) +
"-bit unsigned offset"));
4760 for (
unsigned i = 2, e =
Operands.size(); i != e; ++i) {
4761 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4762 if (
Op.isSMEMOffset() ||
Op.isSMEMOffsetMod())
4763 return Op.getStartLoc();
4768bool AMDGPUAsmParser::validateSMEMOffset(
const MCInst &Inst,
4778 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4794 ?
"expected a 23-bit unsigned offset for buffer ops"
4795 :
isGFX12Plus() ?
"expected a 24-bit signed offset"
4796 : (
isVI() || IsBuffer) ?
"expected a 20-bit unsigned offset"
4797 :
"expected a 21-bit signed offset");
4802bool AMDGPUAsmParser::validateSOPLiteral(
const MCInst &Inst,
4805 const MCInstrDesc &
Desc = MII.
get(Opcode);
4809 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4810 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4812 const int OpIndices[] = { Src0Idx, Src1Idx };
4814 unsigned NumExprs = 0;
4815 unsigned NumLiterals = 0;
4818 for (
int OpIdx : OpIndices) {
4819 if (
OpIdx == -1)
break;
4824 std::optional<int64_t>
Imm;
4827 }
else if (MO.
isExpr()) {
4834 if (!
Imm.has_value()) {
4836 }
else if (!isInlineConstant(Inst,
OpIdx)) {
4840 if (NumLiterals == 0 || LiteralValue !=
Value) {
4848 if (NumLiterals + NumExprs <= 1)
4852 "only one unique literal operand is allowed");
4856bool AMDGPUAsmParser::validateOpSel(
const MCInst &Inst) {
4859 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4869 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4870 if (OpSelIdx != -1) {
4874 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel_hi);
4875 if (OpSelHiIdx != -1) {
4884 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4894 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
4895 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
4896 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4897 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel_hi);
4899 const MCOperand &Src0 = Inst.
getOperand(Src0Idx);
4900 const MCOperand &Src1 = Inst.
getOperand(Src1Idx);
4906 auto VerifyOneSGPR = [
OpSel, OpSelHi](
unsigned Index) ->
bool {
4908 return ((OpSel & Mask) == 0) && ((OpSelHi &
Mask) == 0);
4918 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
4919 if (Src2Idx != -1) {
4920 const MCOperand &Src2 = Inst.
getOperand(Src2Idx);
4930bool AMDGPUAsmParser::validateTrue16OpSel(
const MCInst &Inst) {
4931 if (!hasTrue16Insts())
4933 const MCRegisterInfo *
MRI = getMRI();
4935 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4941 if (OpSelOpValue == 0)
4943 unsigned OpCount = 0;
4944 for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
4945 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
4946 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), OpName);
4951 MRI->getRegClass(AMDGPU::VGPR_16RegClassID).contains(
Op.getReg())) {
4953 bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);
4954 if (OpSelOpIsHi != VGPRSuffixIsHi)
4963bool AMDGPUAsmParser::validateNeg(
const MCInst &Inst, AMDGPU::OpName OpName) {
4964 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
4977 int NegIdx = AMDGPU::getNamedOperandIdx(
Opc, OpName);
4988 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4989 AMDGPU::OpName::src1_modifiers,
4990 AMDGPU::OpName::src2_modifiers};
4992 for (
unsigned i = 0; i < 3; ++i) {
5002bool AMDGPUAsmParser::validateDPP(
const MCInst &Inst,
5005 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dpp_ctrl);
5006 if (DppCtrlIdx >= 0) {
5013 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl,
Operands);
5014 Error(S,
isGFX12() ?
"DP ALU dpp only supports row_share"
5015 :
"DP ALU dpp only supports row_newbcast");
5020 int Dpp8Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dpp8);
5021 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
5024 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
5026 const MCOperand &Src1 = Inst.
getOperand(Src1Idx);
5030 "invalid operand for instruction");
5035 "src1 immediate operand invalid for instruction");
5045bool AMDGPUAsmParser::validateVccOperand(MCRegister
Reg)
const {
5046 return (
Reg == AMDGPU::VCC && isWave64()) ||
5047 (
Reg == AMDGPU::VCC_LO && isWave32());
5051bool AMDGPUAsmParser::validateVOPLiteral(
const MCInst &Inst,
5054 const MCInstrDesc &
Desc = MII.
get(Opcode);
5055 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
5057 !HasMandatoryLiteral && !
isVOPD(Opcode))
5062 std::optional<unsigned> LiteralOpIdx;
5065 for (
int OpIdx : OpIndices) {
5075 std::optional<int64_t>
Imm;
5081 bool IsAnotherLiteral =
false;
5082 if (!
Imm.has_value()) {
5084 IsAnotherLiteral =
true;
5085 }
else if (!isInlineConstant(Inst,
OpIdx)) {
5090 HasMandatoryLiteral);
5096 !IsForcedFP64 && (!has64BitLiterals() ||
Desc.getSize() != 4)) {
5098 "invalid operand for instruction");
5102 if (IsFP64 && IsValid32Op && !IsForcedFP64)
5109 if (IsAnotherLiteral && !HasMandatoryLiteral &&
5110 !getFeatureBits()[FeatureVOP3Literal]) {
5112 "literal operands are not supported");
5116 if (LiteralOpIdx && IsAnotherLiteral) {
5118 getOperandLoc(
Operands, *LiteralOpIdx)),
5119 "only one unique literal operand is allowed");
5123 if (IsAnotherLiteral)
5124 LiteralOpIdx =
OpIdx;
5147bool AMDGPUAsmParser::validateAGPRLdSt(
const MCInst &Inst)
const {
5155 ? AMDGPU::OpName::data0
5156 : AMDGPU::OpName::vdata;
5158 const MCRegisterInfo *
MRI = getMRI();
5164 if (Data2Areg >= 0 && Data2Areg != DataAreg)
5168 auto FB = getFeatureBits();
5169 if (FB[AMDGPU::FeatureGFX90AInsts]) {
5170 if (DataAreg < 0 || DstAreg < 0)
5172 return DstAreg == DataAreg;
5175 return DstAreg < 1 && DataAreg < 1;
5178bool AMDGPUAsmParser::validateVGPRAlign(
const MCInst &Inst)
const {
5179 auto FB = getFeatureBits();
5180 if (!FB[AMDGPU::FeatureRequiresAlignedVGPRs])
5184 const MCRegisterInfo *
MRI = getMRI();
5187 if (FB[AMDGPU::FeatureGFX90AInsts] &&
Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5190 if (FB[AMDGPU::FeatureGFX1250Insts]) {
5194 case AMDGPU::DS_LOAD_TR6_B96:
5195 case AMDGPU::DS_LOAD_TR6_B96_gfx12:
5199 case AMDGPU::GLOBAL_LOAD_TR6_B96:
5200 case AMDGPU::GLOBAL_LOAD_TR6_B96_gfx1250: {
5204 int VAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
5205 if (VAddrIdx != -1) {
5207 MCRegister
Sub =
MRI->getSubReg(
Op.getReg(), AMDGPU::sub0);
5208 if ((
Sub - AMDGPU::VGPR0) & 1)
5213 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR:
5214 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR_gfx1250:
5219 const MCRegisterClass &VGPR32 =
MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5220 const MCRegisterClass &AGPR32 =
MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5226 MCRegister
Sub =
MRI->getSubReg(
Op.getReg(), AMDGPU::sub0);
5240 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
5241 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
5243 return Op.getStartLoc();
5248bool AMDGPUAsmParser::validateBLGP(
const MCInst &Inst,
5251 int BlgpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::blgp);
5254 SMLoc BLGPLoc = getBLGPLoc(
Operands);
5257 bool IsNeg = StringRef(BLGPLoc.
getPointer()).starts_with(
"neg:");
5258 auto FB = getFeatureBits();
5259 bool UsesNeg =
false;
5260 if (FB[AMDGPU::FeatureGFX940Insts]) {
5262 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
5263 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
5264 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
5265 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
5270 if (IsNeg == UsesNeg)
5274 UsesNeg ?
"invalid modifier: blgp is not supported"
5275 :
"invalid modifier: neg is not supported");
5280bool AMDGPUAsmParser::validateWaitCnt(
const MCInst &Inst,
5286 if (
Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
5287 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
5288 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
5289 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
5292 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::sdst);
5295 if (
Reg == AMDGPU::SGPR_NULL)
5298 Error(getOperandLoc(
Operands, Src0Idx),
"src0 must be null");
5302bool AMDGPUAsmParser::validateDS(
const MCInst &Inst,
5308 return validateGWS(Inst,
Operands);
5313 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::gds);
5318 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS,
Operands);
5319 Error(S,
"gds modifier is not supported on this GPU");
5327bool AMDGPUAsmParser::validateGWS(
const MCInst &Inst,
5329 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5333 if (
Opc != AMDGPU::DS_GWS_INIT_vi &&
Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5334 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5337 const MCRegisterInfo *
MRI = getMRI();
5338 const MCRegisterClass &VGPR32 =
MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5340 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::data0);
5343 auto RegIdx =
Reg - (VGPR32.
contains(
Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5345 Error(getOperandLoc(
Operands, Data0Pos),
"vgpr must be even aligned");
5352bool AMDGPUAsmParser::validateCoherencyBits(
const MCInst &Inst,
5355 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.
getOpcode(),
5356 AMDGPU::OpName::cpol);
5364 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol,
Operands);
5367 Error(S,
"scale_offset is not supported on this GPU");
5370 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol,
Operands);
5373 Error(S,
"nv is not supported on this GPU");
5378 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol,
Operands);
5381 Error(S,
"scale_offset is not supported for this instruction");
5385 return validateTHAndScopeBits(Inst,
Operands, CPol);
5390 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol,
Operands);
5391 Error(S,
"cache policy is not supported for SMRD instructions");
5395 Error(IDLoc,
"invalid cache policy for SMEM instruction");
5404 if (!(TSFlags & AllowSCCModifier)) {
5405 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol,
Operands);
5409 "scc modifier is not supported for this instruction on this GPU");
5420 :
"instruction must use glc");
5425 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol,
Operands);
5428 &CStr.data()[CStr.find(
isGFX940() ?
"sc0" :
"glc")]);
5430 :
"instruction must not use glc");
5438bool AMDGPUAsmParser::validateTHAndScopeBits(
const MCInst &Inst,
5440 const unsigned CPol) {
5444 const unsigned Opcode = Inst.
getOpcode();
5445 const MCInstrDesc &TID = MII.
get(Opcode);
5448 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol,
Operands);
5456 return PrintError(
"instruction must use th:TH_ATOMIC_RETURN");
5464 return PrintError(
"invalid th value for SMEM instruction");
5471 return PrintError(
"scope and th combination is not valid");
5477 return PrintError(
"invalid th value for atomic instructions");
5480 return PrintError(
"invalid th value for store instructions");
5483 return PrintError(
"invalid th value for load instructions");
5489bool AMDGPUAsmParser::validateTFE(
const MCInst &Inst,
5492 if (
Desc.mayStore() &&
5494 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE,
Operands);
5496 Error(Loc,
"TFE modifier has no meaning for store instructions");
5504bool AMDGPUAsmParser::validateSetVgprMSB(
const MCInst &Inst,
5506 if (Inst.
getOpcode() != AMDGPU::S_SET_VGPR_MSB_gfx12)
5510 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::simm16);
5512 SMLoc Loc =
Operands[1]->getStartLoc();
5513 Error(Loc,
"s_set_vgpr_msb accepts values in range [0..255]");
5520bool AMDGPUAsmParser::validateWMMA(
const MCInst &Inst,
5526 auto validateFmt = [&](AMDGPU::OpName FmtOp, AMDGPU::OpName SrcOp) ->
bool {
5527 int FmtIdx = AMDGPU::getNamedOperandIdx(
Opc, FmtOp);
5531 int SrcIdx = AMDGPU::getNamedOperandIdx(
Opc, SrcOp);
5539 static const char *FmtNames[] = {
"MATRIX_FMT_FP8",
"MATRIX_FMT_BF8",
5540 "MATRIX_FMT_FP6",
"MATRIX_FMT_BF6",
5544 "wrong register tuple size for " + Twine(FmtNames[Fmt]));
5548 return validateFmt(AMDGPU::OpName::matrix_a_fmt, AMDGPU::OpName::src0) &&
5549 validateFmt(AMDGPU::OpName::matrix_b_fmt, AMDGPU::OpName::src1);
5552bool AMDGPUAsmParser::validateInstruction(
const MCInst &Inst, SMLoc IDLoc,
5554 if (!validateLdsDirect(Inst,
Operands))
5556 if (!validateTrue16OpSel(Inst)) {
5558 "op_sel operand conflicts with 16-bit operand suffix");
5561 if (!validateSOPLiteral(Inst,
Operands))
5563 if (!validateVOPLiteral(Inst,
Operands)) {
5566 if (!validateConstantBusLimitations(Inst,
Operands)) {
5569 if (!validateVOPD(Inst,
Operands)) {
5572 if (!validateIntClampSupported(Inst)) {
5574 "integer clamping is not supported on this GPU");
5577 if (!validateOpSel(Inst)) {
5579 "invalid op_sel operand");
5582 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5584 "invalid neg_lo operand");
5587 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5589 "invalid neg_hi operand");
5592 if (!validateDPP(Inst,
Operands)) {
5596 if (!validateMIMGD16(Inst)) {
5598 "d16 modifier is not supported on this GPU");
5601 if (!validateMIMGDim(Inst,
Operands)) {
5602 Error(IDLoc,
"missing dim operand");
5605 if (!validateTensorR128(Inst)) {
5607 "instruction must set modifier r128=0");
5610 if (!validateMIMGMSAA(Inst)) {
5612 "invalid dim; must be MSAA type");
5615 if (!validateMIMGDataSize(Inst, IDLoc)) {
5618 if (!validateMIMGAddrSize(Inst, IDLoc))
5620 if (!validateMIMGAtomicDMask(Inst)) {
5622 "invalid atomic image dmask");
5625 if (!validateMIMGGatherDMask(Inst)) {
5627 "invalid image_gather dmask: only one bit must be set");
5630 if (!validateMovrels(Inst,
Operands)) {
5633 if (!validateOffset(Inst,
Operands)) {
5636 if (!validateMAIAccWrite(Inst,
Operands)) {
5639 if (!validateMAISrc2(Inst,
Operands)) {
5642 if (!validateMFMA(Inst,
Operands)) {
5645 if (!validateCoherencyBits(Inst,
Operands, IDLoc)) {
5649 if (!validateAGPRLdSt(Inst)) {
5650 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5651 ?
"invalid register class: data and dst should be all VGPR or AGPR"
5652 :
"invalid register class: agpr loads and stores not supported on this GPU"
5656 if (!validateVGPRAlign(Inst)) {
5658 "invalid register class: vgpr tuples must be 64 bit aligned");
5665 if (!validateBLGP(Inst,
Operands)) {
5669 if (!validateDivScale(Inst)) {
5670 Error(IDLoc,
"ABS not allowed in VOP3B instructions");
5673 if (!validateWaitCnt(Inst,
Operands)) {
5676 if (!validateTFE(Inst,
Operands)) {
5679 if (!validateSetVgprMSB(Inst,
Operands)) {
5682 if (!validateWMMA(Inst,
Operands)) {
5691 unsigned VariantID = 0);
5695 unsigned VariantID);
5697bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
5702bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5703 const FeatureBitset &FBS,
5704 ArrayRef<unsigned> Variants) {
5705 for (
auto Variant : Variants) {
5713bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5715 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5718 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5723 getParser().clearPendingErrors();
5727 StringRef VariantName = getMatchedVariantName();
5728 if (!VariantName.
empty() && isSupportedMnemo(Mnemo, FBS)) {
5731 " variant of this instruction is not supported"));
5735 if (
isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5736 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5738 FeatureBitset FeaturesWS32 = getFeatureBits();
5739 FeaturesWS32.
flip(AMDGPU::FeatureWavefrontSize64)
5740 .
flip(AMDGPU::FeatureWavefrontSize32);
5741 FeatureBitset AvailableFeaturesWS32 =
5742 ComputeAvailableFeatures(FeaturesWS32);
5744 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5745 return Error(IDLoc,
"instruction requires wavesize=32");
5749 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5750 return Error(IDLoc,
"instruction not supported on this GPU");
5755 return Error(IDLoc,
"invalid instruction" + Suggestion);
5761 const auto &
Op = ((AMDGPUOperand &)*
Operands[InvalidOprIdx]);
5762 if (
Op.isToken() && InvalidOprIdx > 1) {
5763 const auto &PrevOp = ((AMDGPUOperand &)*
Operands[InvalidOprIdx - 1]);
5764 return PrevOp.isToken() && PrevOp.getToken() ==
"::";
5769bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc,
unsigned &Opcode,
5772 uint64_t &ErrorInfo,
5773 bool MatchingInlineAsm) {
5776 unsigned Result = Match_Success;
5777 for (
auto Variant : getMatchedVariants()) {
5779 auto R = MatchInstructionImpl(
Operands, Inst, EI, MatchingInlineAsm,
5784 if (R == Match_Success || R == Match_MissingFeature ||
5785 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5786 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5787 Result != Match_MissingFeature)) {
5791 if (R == Match_Success)
5795 if (Result == Match_Success) {
5796 if (!validateInstruction(Inst, IDLoc,
Operands)) {
5804 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5810 case Match_MissingFeature:
5814 return Error(IDLoc,
"operands are not valid for this GPU or mode");
5816 case Match_InvalidOperand: {
5817 SMLoc ErrorLoc = IDLoc;
5818 if (ErrorInfo != ~0ULL) {
5819 if (ErrorInfo >=
Operands.size()) {
5820 return Error(IDLoc,
"too few operands for instruction");
5822 ErrorLoc = ((AMDGPUOperand &)*
Operands[ErrorInfo]).getStartLoc();
5823 if (ErrorLoc == SMLoc())
5827 return Error(ErrorLoc,
"invalid VOPDY instruction");
5829 return Error(ErrorLoc,
"invalid operand for instruction");
5832 case Match_MnemonicFail:
5838bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5843 if (getParser().parseAbsoluteExpression(Tmp)) {
5846 Ret =
static_cast<uint32_t
>(Tmp);
5850bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5851 if (!getSTI().getTargetTriple().isAMDGCN())
5852 return TokError(
"directive only supported for amdgcn architecture");
5854 std::string TargetIDDirective;
5855 SMLoc TargetStart = getTok().getLoc();
5856 if (getParser().parseEscapedString(TargetIDDirective))
5859 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5860 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
5861 return getParser().Error(TargetRange.
Start,
5862 (Twine(
".amdgcn_target directive's target id ") +
5863 Twine(TargetIDDirective) +
5864 Twine(
" does not match the specified target id ") +
5865 Twine(getTargetStreamer().getTargetID()->
toString())).str());
5870bool AMDGPUAsmParser::OutOfRangeError(SMRange
Range) {
5874bool AMDGPUAsmParser::calculateGPRBlocks(
5875 const FeatureBitset &Features,
const MCExpr *VCCUsed,
5876 const MCExpr *FlatScrUsed,
bool XNACKUsed,
5877 std::optional<bool> EnableWavefrontSize32,
const MCExpr *NextFreeVGPR,
5878 SMRange VGPRRange,
const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5879 const MCExpr *&VGPRBlocks,
const MCExpr *&SGPRBlocks) {
5885 const MCExpr *
NumSGPRs = NextFreeSGPR;
5886 int64_t EvaluatedSGPRs;
5891 unsigned MaxAddressableNumSGPRs =
5894 if (
NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
Version.Major >= 8 &&
5895 !Features.
test(FeatureSGPRInitBug) &&
5896 static_cast<uint64_t
>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5897 return OutOfRangeError(SGPRRange);
5899 const MCExpr *ExtraSGPRs =
5903 if (
NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5904 (
Version.Major <= 7 || Features.
test(FeatureSGPRInitBug)) &&
5905 static_cast<uint64_t
>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5906 return OutOfRangeError(SGPRRange);
5908 if (Features.
test(FeatureSGPRInitBug))
5915 auto GetNumGPRBlocks = [&Ctx](
const MCExpr *NumGPR,
5916 unsigned Granule) ->
const MCExpr * {
5920 const MCExpr *AlignToGPR =
5922 const MCExpr *DivGPR =
5928 VGPRBlocks = GetNumGPRBlocks(
5937bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5938 if (!getSTI().getTargetTriple().isAMDGCN())
5939 return TokError(
"directive only supported for amdgcn architecture");
5942 return TokError(
"directive only supported for amdhsa OS");
5944 StringRef KernelName;
5945 if (getParser().parseIdentifier(KernelName))
5948 AMDGPU::MCKernelDescriptor KD =
5960 const MCExpr *NextFreeVGPR = ZeroExpr;
5962 const MCExpr *NamedBarCnt = ZeroExpr;
5963 uint64_t SharedVGPRCount = 0;
5964 uint64_t PreloadLength = 0;
5965 uint64_t PreloadOffset = 0;
5967 const MCExpr *NextFreeSGPR = ZeroExpr;
5970 unsigned ImpliedUserSGPRCount = 0;
5974 std::optional<unsigned> ExplicitUserSGPRCount;
5975 const MCExpr *ReserveVCC = OneExpr;
5976 const MCExpr *ReserveFlatScr = OneExpr;
5977 std::optional<bool> EnableWavefrontSize32;
5983 SMRange IDRange = getTok().getLocRange();
5984 if (!parseId(
ID,
"expected .amdhsa_ directive or .end_amdhsa_kernel"))
5987 if (
ID ==
".end_amdhsa_kernel")
5991 return TokError(
".amdhsa_ directives cannot be repeated");
5993 SMLoc ValStart = getLoc();
5994 const MCExpr *ExprVal;
5995 if (getParser().parseExpression(ExprVal))
5997 SMLoc ValEnd = getLoc();
5998 SMRange ValRange = SMRange(ValStart, ValEnd);
6001 uint64_t Val = IVal;
6002 bool EvaluatableExpr;
6003 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
6005 return OutOfRangeError(ValRange);
6009#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
6010 if (!isUInt<ENTRY##_WIDTH>(Val)) \
6011 return OutOfRangeError(RANGE); \
6012 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
6017#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
6019 return Error(IDRange.Start, "directive should have resolvable expression", \
6022 if (
ID ==
".amdhsa_group_segment_fixed_size") {
6025 return OutOfRangeError(ValRange);
6027 }
else if (
ID ==
".amdhsa_private_segment_fixed_size") {
6030 return OutOfRangeError(ValRange);
6032 }
else if (
ID ==
".amdhsa_kernarg_size") {
6034 return OutOfRangeError(ValRange);
6036 }
else if (
ID ==
".amdhsa_user_sgpr_count") {
6038 ExplicitUserSGPRCount = Val;
6039 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_buffer") {
6043 "directive is not supported with architected flat scratch",
6046 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
6049 ImpliedUserSGPRCount += 4;
6050 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_length") {
6053 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6056 return OutOfRangeError(ValRange);
6060 ImpliedUserSGPRCount += Val;
6061 PreloadLength = Val;
6063 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_offset") {
6066 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6069 return OutOfRangeError(ValRange);
6073 PreloadOffset = Val;
6074 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_ptr") {
6077 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
6080 ImpliedUserSGPRCount += 2;
6081 }
else if (
ID ==
".amdhsa_user_sgpr_queue_ptr") {
6084 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
6087 ImpliedUserSGPRCount += 2;
6088 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_segment_ptr") {
6091 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
6094 ImpliedUserSGPRCount += 2;
6095 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_id") {
6098 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
6101 ImpliedUserSGPRCount += 2;
6102 }
else if (
ID ==
".amdhsa_user_sgpr_flat_scratch_init") {
6105 "directive is not supported with architected flat scratch",
6109 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
6112 ImpliedUserSGPRCount += 2;
6113 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_size") {
6116 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
6119 ImpliedUserSGPRCount += 1;
6120 }
else if (
ID ==
".amdhsa_wavefront_size32") {
6122 if (IVersion.
Major < 10)
6123 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
6124 EnableWavefrontSize32 = Val;
6126 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
6128 }
else if (
ID ==
".amdhsa_uses_dynamic_stack") {
6130 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
6132 }
else if (
ID ==
".amdhsa_system_sgpr_private_segment_wavefront_offset") {
6135 "directive is not supported with architected flat scratch",
6138 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6140 }
else if (
ID ==
".amdhsa_enable_private_segment") {
6144 "directive is not supported without architected flat scratch",
6147 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6149 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_x") {
6151 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
6153 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_y") {
6155 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
6157 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_z") {
6159 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
6161 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_info") {
6163 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
6165 }
else if (
ID ==
".amdhsa_system_vgpr_workitem_id") {
6167 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
6169 }
else if (
ID ==
".amdhsa_next_free_vgpr") {
6170 VGPRRange = ValRange;
6171 NextFreeVGPR = ExprVal;
6172 }
else if (
ID ==
".amdhsa_next_free_sgpr") {
6173 SGPRRange = ValRange;
6174 NextFreeSGPR = ExprVal;
6175 }
else if (
ID ==
".amdhsa_accum_offset") {
6177 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6178 AccumOffset = ExprVal;
6179 }
else if (
ID ==
".amdhsa_named_barrier_count") {
6181 return Error(IDRange.
Start,
"directive requires gfx1250+", IDRange);
6182 NamedBarCnt = ExprVal;
6183 }
else if (
ID ==
".amdhsa_reserve_vcc") {
6185 return OutOfRangeError(ValRange);
6186 ReserveVCC = ExprVal;
6187 }
else if (
ID ==
".amdhsa_reserve_flat_scratch") {
6188 if (IVersion.
Major < 7)
6189 return Error(IDRange.
Start,
"directive requires gfx7+", IDRange);
6192 "directive is not supported with architected flat scratch",
6195 return OutOfRangeError(ValRange);
6196 ReserveFlatScr = ExprVal;
6197 }
else if (
ID ==
".amdhsa_reserve_xnack_mask") {
6198 if (IVersion.
Major < 8)
6199 return Error(IDRange.
Start,
"directive requires gfx8+", IDRange);
6201 return OutOfRangeError(ValRange);
6202 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
6203 return getParser().Error(IDRange.
Start,
".amdhsa_reserve_xnack_mask does not match target id",
6205 }
else if (
ID ==
".amdhsa_float_round_mode_32") {
6207 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
6209 }
else if (
ID ==
".amdhsa_float_round_mode_16_64") {
6211 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
6213 }
else if (
ID ==
".amdhsa_float_denorm_mode_32") {
6215 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
6217 }
else if (
ID ==
".amdhsa_float_denorm_mode_16_64") {
6219 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
6221 }
else if (
ID ==
".amdhsa_dx10_clamp") {
6222 if (IVersion.
Major >= 12)
6223 return Error(IDRange.
Start,
"directive unsupported on gfx12+", IDRange);
6225 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
6227 }
else if (
ID ==
".amdhsa_ieee_mode") {
6228 if (IVersion.
Major >= 12)
6229 return Error(IDRange.
Start,
"directive unsupported on gfx12+", IDRange);
6231 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
6233 }
else if (
ID ==
".amdhsa_fp16_overflow") {
6234 if (IVersion.
Major < 9)
6235 return Error(IDRange.
Start,
"directive requires gfx9+", IDRange);
6237 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
6239 }
else if (
ID ==
".amdhsa_tg_split") {
6241 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6244 }
else if (
ID ==
".amdhsa_workgroup_processor_mode") {
6247 "directive unsupported on " + getSTI().
getCPU(), IDRange);
6249 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
6251 }
else if (
ID ==
".amdhsa_memory_ordered") {
6252 if (IVersion.
Major < 10)
6253 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
6255 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
6257 }
else if (
ID ==
".amdhsa_forward_progress") {
6258 if (IVersion.
Major < 10)
6259 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
6261 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
6263 }
else if (
ID ==
".amdhsa_shared_vgpr_count") {
6265 if (IVersion.
Major < 10 || IVersion.
Major >= 12)
6266 return Error(IDRange.
Start,
"directive requires gfx10 or gfx11",
6268 SharedVGPRCount = Val;
6270 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
6272 }
else if (
ID ==
".amdhsa_inst_pref_size") {
6273 if (IVersion.
Major < 11)
6274 return Error(IDRange.
Start,
"directive requires gfx11+", IDRange);
6275 if (IVersion.
Major == 11) {
6277 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
6281 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
6284 }
else if (
ID ==
".amdhsa_exception_fp_ieee_invalid_op") {
6287 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
6289 }
else if (
ID ==
".amdhsa_exception_fp_denorm_src") {
6291 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
6293 }
else if (
ID ==
".amdhsa_exception_fp_ieee_div_zero") {
6296 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
6298 }
else if (
ID ==
".amdhsa_exception_fp_ieee_overflow") {
6300 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
6302 }
else if (
ID ==
".amdhsa_exception_fp_ieee_underflow") {
6304 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
6306 }
else if (
ID ==
".amdhsa_exception_fp_ieee_inexact") {
6308 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
6310 }
else if (
ID ==
".amdhsa_exception_int_div_zero") {
6312 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
6314 }
else if (
ID ==
".amdhsa_round_robin_scheduling") {
6315 if (IVersion.
Major < 12)
6316 return Error(IDRange.
Start,
"directive requires gfx12+", IDRange);
6318 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
6321 return Error(IDRange.
Start,
"unknown .amdhsa_kernel directive", IDRange);
6324#undef PARSE_BITS_ENTRY
6327 if (!Seen.
contains(
".amdhsa_next_free_vgpr"))
6328 return TokError(
".amdhsa_next_free_vgpr directive is required");
6330 if (!Seen.
contains(
".amdhsa_next_free_sgpr"))
6331 return TokError(
".amdhsa_next_free_sgpr directive is required");
6333 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
6338 if (PreloadLength) {
6344 const MCExpr *VGPRBlocks;
6345 const MCExpr *SGPRBlocks;
6346 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
6347 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
6348 EnableWavefrontSize32, NextFreeVGPR,
6349 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
6353 int64_t EvaluatedVGPRBlocks;
6354 bool VGPRBlocksEvaluatable =
6355 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
6356 if (VGPRBlocksEvaluatable &&
6358 static_cast<uint64_t
>(EvaluatedVGPRBlocks))) {
6359 return OutOfRangeError(VGPRRange);
6363 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
6364 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT,
getContext());
6366 int64_t EvaluatedSGPRBlocks;
6367 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
6369 static_cast<uint64_t
>(EvaluatedSGPRBlocks)))
6370 return OutOfRangeError(SGPRRange);
6373 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
6374 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
getContext());
6376 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
6377 return TokError(
"amdgpu_user_sgpr_count smaller than than implied by "
6378 "enabled user SGPRs");
6382 return TokError(
"too many user SGPRs enabled");
6386 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
6387 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT,
getContext());
6391 return TokError(
"too many user SGPRs enabled");
6395 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
6396 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT,
getContext());
6401 return TokError(
"Kernarg size should be resolvable");
6402 uint64_t kernarg_size = IVal;
6403 if (PreloadLength && kernarg_size &&
6404 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
6405 return TokError(
"Kernarg preload length + offset is larger than the "
6406 "kernarg segment size");
6409 if (!Seen.
contains(
".amdhsa_accum_offset"))
6410 return TokError(
".amdhsa_accum_offset directive is required");
6411 int64_t EvaluatedAccum;
6412 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
6413 uint64_t UEvaluatedAccum = EvaluatedAccum;
6414 if (AccumEvaluatable &&
6415 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
6416 return TokError(
"accum_offset should be in range [4..256] in "
6419 int64_t EvaluatedNumVGPR;
6420 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
6423 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
6424 return TokError(
"accum_offset exceeds total VGPR allocation");
6430 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6431 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6437 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
6438 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
6441 if (IVersion.
Major >= 10 && IVersion.
Major < 12) {
6443 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6444 return TokError(
"shared_vgpr_count directive not valid on "
6445 "wavefront size 32");
6448 if (VGPRBlocksEvaluatable &&
6449 (SharedVGPRCount * 2 +
static_cast<uint64_t
>(EvaluatedVGPRBlocks) >
6451 return TokError(
"shared_vgpr_count*2 + "
6452 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6457 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
6458 NextFreeVGPR, NextFreeSGPR,
6459 ReserveVCC, ReserveFlatScr);
6463bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6465 if (ParseAsAbsoluteExpression(
Version))
6468 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(
Version);
6472bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef
ID,
6473 AMDGPUMCKernelCodeT &
C) {
6476 if (
ID ==
"max_scratch_backing_memory_byte_size") {
6477 Parser.eatToEndOfStatement();
6481 SmallString<40> ErrStr;
6482 raw_svector_ostream Err(ErrStr);
6483 if (!
C.ParseKernelCodeT(
ID, getParser(), Err)) {
6484 return TokError(Err.
str());
6488 if (
ID ==
"enable_wavefront_size32") {
6491 return TokError(
"enable_wavefront_size32=1 is only allowed on GFX10+");
6493 return TokError(
"enable_wavefront_size32=1 requires +WavefrontSize32");
6496 return TokError(
"enable_wavefront_size32=0 requires +WavefrontSize64");
6500 if (
ID ==
"wavefront_size") {
6501 if (
C.wavefront_size == 5) {
6503 return TokError(
"wavefront_size=5 is only allowed on GFX10+");
6505 return TokError(
"wavefront_size=5 requires +WavefrontSize32");
6506 }
else if (
C.wavefront_size == 6) {
6508 return TokError(
"wavefront_size=6 requires +WavefrontSize64");
6515bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6516 AMDGPUMCKernelCodeT KernelCode;
6525 if (!parseId(
ID,
"expected value identifier or .end_amd_kernel_code_t"))
6528 if (
ID ==
".end_amd_kernel_code_t")
6531 if (ParseAMDKernelCodeTValue(
ID, KernelCode))
6536 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6541bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6542 StringRef KernelName;
6543 if (!parseId(KernelName,
"expected symbol name"))
6546 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6553bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6554 if (!getSTI().getTargetTriple().isAMDGCN()) {
6555 return Error(getLoc(),
6556 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6560 auto TargetIDDirective = getLexer().getTok().getStringContents();
6561 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
6562 return Error(getParser().getTok().getLoc(),
"target id must match options");
6564 getTargetStreamer().EmitISAVersion();
6570bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6573 std::string HSAMetadataString;
6578 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6579 return Error(getLoc(),
"invalid HSA metadata");
6586bool AMDGPUAsmParser::ParseToEndDirective(
const char *AssemblerDirectiveBegin,
6587 const char *AssemblerDirectiveEnd,
6588 std::string &CollectString) {
6590 raw_string_ostream CollectStream(CollectString);
6592 getLexer().setSkipSpace(
false);
6594 bool FoundEnd =
false;
6597 CollectStream << getTokenStr();
6601 if (trySkipId(AssemblerDirectiveEnd)) {
6606 CollectStream << Parser.parseStringToEndOfStatement()
6607 <<
getContext().getAsmInfo()->getSeparatorString();
6609 Parser.eatToEndOfStatement();
6612 getLexer().setSkipSpace(
true);
6615 return TokError(Twine(
"expected directive ") +
6616 Twine(AssemblerDirectiveEnd) + Twine(
" not found"));
6623bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6629 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6630 if (!PALMetadata->setFromString(
String))
6631 return Error(getLoc(),
"invalid PAL metadata");
6636bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6638 return Error(getLoc(),
6640 "not available on non-amdpal OSes")).str());
6643 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6644 PALMetadata->setLegacy();
6647 if (ParseAsAbsoluteExpression(
Key)) {
6648 return TokError(Twine(
"invalid value in ") +
6652 return TokError(Twine(
"expected an even number of values in ") +
6655 if (ParseAsAbsoluteExpression(
Value)) {
6656 return TokError(Twine(
"invalid value in ") +
6659 PALMetadata->setRegister(
Key,
Value);
6668bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6669 if (getParser().checkForValidSection())
6673 SMLoc NameLoc = getLoc();
6674 if (getParser().parseIdentifier(Name))
6675 return TokError(
"expected identifier in directive");
6678 if (getParser().parseComma())
6684 SMLoc SizeLoc = getLoc();
6685 if (getParser().parseAbsoluteExpression(
Size))
6688 return Error(SizeLoc,
"size must be non-negative");
6689 if (
Size > LocalMemorySize)
6690 return Error(SizeLoc,
"size is too large");
6692 int64_t Alignment = 4;
6694 SMLoc AlignLoc = getLoc();
6695 if (getParser().parseAbsoluteExpression(Alignment))
6698 return Error(AlignLoc,
"alignment must be a power of two");
6703 if (Alignment >= 1u << 31)
6704 return Error(AlignLoc,
"alignment is too large");
6710 Symbol->redefineIfPossible();
6711 if (!
Symbol->isUndefined())
6712 return Error(NameLoc,
"invalid symbol redefinition");
6714 getTargetStreamer().emitAMDGPULDS(Symbol,
Size,
Align(Alignment));
6718bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6719 StringRef IDVal = DirectiveID.
getString();
6722 if (IDVal ==
".amdhsa_kernel")
6723 return ParseDirectiveAMDHSAKernel();
6725 if (IDVal ==
".amdhsa_code_object_version")
6726 return ParseDirectiveAMDHSACodeObjectVersion();
6730 return ParseDirectiveHSAMetadata();
6732 if (IDVal ==
".amd_kernel_code_t")
6733 return ParseDirectiveAMDKernelCodeT();
6735 if (IDVal ==
".amdgpu_hsa_kernel")
6736 return ParseDirectiveAMDGPUHsaKernel();
6738 if (IDVal ==
".amd_amdgpu_isa")
6739 return ParseDirectiveISAVersion();
6743 Twine(
" directive is "
6744 "not available on non-amdhsa OSes"))
6749 if (IDVal ==
".amdgcn_target")
6750 return ParseDirectiveAMDGCNTarget();
6752 if (IDVal ==
".amdgpu_lds")
6753 return ParseDirectiveAMDGPULDS();
6756 return ParseDirectivePALMetadataBegin();
6759 return ParseDirectivePALMetadata();
6764bool AMDGPUAsmParser::subtargetHasRegister(
const MCRegisterInfo &
MRI,
6766 if (
MRI.regsOverlap(TTMP12_TTMP13_TTMP14_TTMP15,
Reg))
6770 if (
MRI.regsOverlap(SGPR104_SGPR105,
Reg))
6771 return hasSGPR104_SGPR105();
6774 case SRC_SHARED_BASE_LO:
6775 case SRC_SHARED_BASE:
6776 case SRC_SHARED_LIMIT_LO:
6777 case SRC_SHARED_LIMIT:
6778 case SRC_PRIVATE_BASE_LO:
6779 case SRC_PRIVATE_BASE:
6780 case SRC_PRIVATE_LIMIT_LO:
6781 case SRC_PRIVATE_LIMIT:
6783 case SRC_FLAT_SCRATCH_BASE_LO:
6784 case SRC_FLAT_SCRATCH_BASE_HI:
6785 return hasGloballyAddressableScratch();
6786 case SRC_POPS_EXITING_WAVE_ID:
6798 return (
isVI() ||
isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6827 if (
MRI.regsOverlap(SGPR102_SGPR103,
Reg))
6828 return hasSGPR102_SGPR103();
6836 ParseStatus Res = parseVOPD(
Operands);
6841 Res = MatchOperandParserImpl(
Operands, Mnemonic);
6853 SMLoc LBraceLoc = getLoc();
6858 auto Loc = getLoc();
6861 Error(Loc,
"expected a register");
6865 RBraceLoc = getLoc();
6870 "expected a comma or a closing square bracket"))
6874 if (
Operands.size() - Prefix > 1) {
6876 AMDGPUOperand::CreateToken(
this,
"[", LBraceLoc));
6877 Operands.push_back(AMDGPUOperand::CreateToken(
this,
"]", RBraceLoc));
6886StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6888 setForcedEncodingSize(0);
6889 setForcedDPP(
false);
6890 setForcedSDWA(
false);
6892 if (
Name.consume_back(
"_e64_dpp")) {
6894 setForcedEncodingSize(64);
6897 if (
Name.consume_back(
"_e64")) {
6898 setForcedEncodingSize(64);
6901 if (
Name.consume_back(
"_e32")) {
6902 setForcedEncodingSize(32);
6905 if (
Name.consume_back(
"_dpp")) {
6909 if (
Name.consume_back(
"_sdwa")) {
6910 setForcedSDWA(
true);
6918 unsigned VariantID);
6924 Name = parseMnemonicSuffix(Name);
6930 Operands.push_back(AMDGPUOperand::CreateToken(
this, Name, NameLoc));
6932 bool IsMIMG = Name.starts_with(
"image_");
6935 OperandMode
Mode = OperandMode_Default;
6937 Mode = OperandMode_NSA;
6941 checkUnsupportedInstruction(Name, NameLoc);
6942 if (!Parser.hasPendingError()) {
6945 :
"not a valid operand.";
6946 Error(getLoc(), Msg);
6965ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6968 if (!trySkipId(Name))
6971 Operands.push_back(AMDGPUOperand::CreateToken(
this, Name, S));
6975ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
const char *Prefix,
6984ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6986 std::function<
bool(int64_t &)> ConvertResult) {
6990 ParseStatus Res = parseIntWithPrefix(Prefix,
Value);
6994 if (ConvertResult && !ConvertResult(
Value)) {
6995 Error(S,
"invalid " + StringRef(Prefix) +
" value.");
6998 Operands.push_back(AMDGPUOperand::CreateImm(
this,
Value, S, ImmTy));
7002ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
7004 bool (*ConvertResult)(int64_t &)) {
7013 const unsigned MaxSize = 4;
7017 for (
int I = 0; ; ++
I) {
7019 SMLoc Loc = getLoc();
7023 if (
Op != 0 &&
Op != 1)
7024 return Error(Loc,
"invalid " + StringRef(Prefix) +
" value.");
7031 if (
I + 1 == MaxSize)
7032 return Error(getLoc(),
"expected a closing square bracket");
7038 Operands.push_back(AMDGPUOperand::CreateImm(
this, Val, S, ImmTy));
7042ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
7044 AMDGPUOperand::ImmTy ImmTy) {
7048 if (trySkipId(Name)) {
7050 }
else if (trySkipId(
"no", Name)) {
7057 return Error(S,
"r128 modifier is not supported on this GPU");
7058 if (Name ==
"a16" && !
hasA16())
7059 return Error(S,
"a16 modifier is not supported on this GPU");
7061 if (
isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
7062 ImmTy = AMDGPUOperand::ImmTyR128A16;
7064 Operands.push_back(AMDGPUOperand::CreateImm(
this, Bit, S, ImmTy));
7068unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
7069 bool &Disabling)
const {
7070 Disabling =
Id.consume_front(
"no");
7073 return StringSwitch<unsigned>(Id)
7080 return StringSwitch<unsigned>(Id)
7090 SMLoc StringLoc = getLoc();
7092 int64_t CPolVal = 0;
7112 ResScope = parseScope(
Operands, Scope);
7125 if (trySkipId(
"nv")) {
7129 }
else if (trySkipId(
"no",
"nv")) {
7136 if (trySkipId(
"scale_offset")) {
7140 }
else if (trySkipId(
"no",
"scale_offset")) {
7153 Operands.push_back(AMDGPUOperand::CreateImm(
this, CPolVal, StringLoc,
7154 AMDGPUOperand::ImmTyCPol));
7159 SMLoc OpLoc = getLoc();
7160 unsigned Enabled = 0, Seen = 0;
7164 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
7171 return Error(S,
"dlc modifier is not supported on this GPU");
7174 return Error(S,
"scc modifier is not supported on this GPU");
7177 return Error(S,
"duplicate cache policy modifier");
7189 AMDGPUOperand::CreateImm(
this,
Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
7198 ParseStatus Res = parseStringOrIntWithPrefix(
7199 Operands,
"scope", {
"SCOPE_CU",
"SCOPE_SE",
"SCOPE_DEV",
"SCOPE_SYS"},
7213 ParseStatus Res = parseStringWithPrefix(
"th",
Value, StringLoc);
7217 if (
Value ==
"TH_DEFAULT")
7219 else if (
Value ==
"TH_STORE_LU" ||
Value ==
"TH_LOAD_WB" ||
7220 Value ==
"TH_LOAD_NT_WB") {
7221 return Error(StringLoc,
"invalid th value");
7222 }
else if (
Value.consume_front(
"TH_ATOMIC_")) {
7224 }
else if (
Value.consume_front(
"TH_LOAD_")) {
7226 }
else if (
Value.consume_front(
"TH_STORE_")) {
7229 return Error(StringLoc,
"invalid th value");
7232 if (
Value ==
"BYPASS")
7237 TH |= StringSwitch<int64_t>(
Value)
7247 .Default(0xffffffff);
7249 TH |= StringSwitch<int64_t>(
Value)
7260 .Default(0xffffffff);
7263 if (TH == 0xffffffff)
7264 return Error(StringLoc,
"invalid th value");
7271 AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
7272 AMDGPUOperand::ImmTy ImmT, int64_t
Default = 0,
7273 std::optional<unsigned> InsertAt = std::nullopt) {
7274 auto i = OptionalIdx.find(ImmT);
7275 if (i != OptionalIdx.end()) {
7276 unsigned Idx = i->second;
7277 const AMDGPUOperand &
Op =
7278 static_cast<const AMDGPUOperand &
>(*
Operands[Idx]);
7282 Op.addImmOperands(Inst, 1);
7284 if (InsertAt.has_value())
7291ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
7297 StringLoc = getLoc();
7302ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7308 SMLoc StringLoc = getLoc();
7312 Value = getTokenStr();
7316 if (
Value == Ids[IntVal])
7321 if (IntVal < 0 || IntVal >= (int64_t)Ids.
size())
7322 return Error(StringLoc,
"invalid " + Twine(Name) +
" value");
7327ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7329 AMDGPUOperand::ImmTy
Type) {
7333 ParseStatus Res = parseStringOrIntWithPrefix(
Operands, Name, Ids, IntVal);
7335 Operands.push_back(AMDGPUOperand::CreateImm(
this, IntVal, S,
Type));
7344bool AMDGPUAsmParser::tryParseFmt(
const char *Pref,
7348 SMLoc Loc = getLoc();
7350 auto Res = parseIntWithPrefix(Pref, Val);
7356 if (Val < 0 || Val > MaxVal) {
7357 Error(Loc, Twine(
"out of range ", StringRef(Pref)));
7366 AMDGPUOperand::ImmTy ImmTy) {
7367 const char *Pref =
"index_key";
7369 SMLoc Loc = getLoc();
7370 auto Res = parseIntWithPrefix(Pref, ImmVal);
7374 if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
7375 ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
7376 (ImmVal < 0 || ImmVal > 1))
7377 return Error(Loc, Twine(
"out of range ", StringRef(Pref)));
7379 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
7380 return Error(Loc, Twine(
"out of range ", StringRef(Pref)));
7382 Operands.push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc, ImmTy));
7387 return tryParseIndexKey(
Operands, AMDGPUOperand::ImmTyIndexKey8bit);
7391 return tryParseIndexKey(
Operands, AMDGPUOperand::ImmTyIndexKey16bit);
7395 return tryParseIndexKey(
Operands, AMDGPUOperand::ImmTyIndexKey32bit);
7400 AMDGPUOperand::ImmTy
Type) {
7401 return parseStringOrIntWithPrefix(
Operands, Name,
7402 {
"MATRIX_FMT_FP8",
"MATRIX_FMT_BF8",
7403 "MATRIX_FMT_FP6",
"MATRIX_FMT_BF6",
7409 return tryParseMatrixFMT(
Operands,
"matrix_a_fmt",
7410 AMDGPUOperand::ImmTyMatrixAFMT);
7414 return tryParseMatrixFMT(
Operands,
"matrix_b_fmt",
7415 AMDGPUOperand::ImmTyMatrixBFMT);
7420 AMDGPUOperand::ImmTy
Type) {
7421 return parseStringOrIntWithPrefix(
7422 Operands, Name, {
"MATRIX_SCALE_ROW0",
"MATRIX_SCALE_ROW1"},
Type);
7426 return tryParseMatrixScale(
Operands,
"matrix_a_scale",
7427 AMDGPUOperand::ImmTyMatrixAScale);
7431 return tryParseMatrixScale(
Operands,
"matrix_b_scale",
7432 AMDGPUOperand::ImmTyMatrixBScale);
7437 AMDGPUOperand::ImmTy
Type) {
7438 return parseStringOrIntWithPrefix(
7440 {
"MATRIX_SCALE_FMT_E8",
"MATRIX_SCALE_FMT_E5M3",
"MATRIX_SCALE_FMT_E4M3"},
7445 return tryParseMatrixScaleFmt(
Operands,
"matrix_a_scale_fmt",
7446 AMDGPUOperand::ImmTyMatrixAScaleFmt);
7450 return tryParseMatrixScaleFmt(
Operands,
"matrix_b_scale_fmt",
7451 AMDGPUOperand::ImmTyMatrixBScaleFmt);
7456ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &
Format) {
7457 using namespace llvm::AMDGPU::MTBUFFormat;
7463 for (
int I = 0;
I < 2; ++
I) {
7464 if (Dfmt == DFMT_UNDEF && !tryParseFmt(
"dfmt", DFMT_MAX, Dfmt))
7467 if (Nfmt == NFMT_UNDEF && !tryParseFmt(
"nfmt", NFMT_MAX, Nfmt))
7472 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
7478 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
7481 Dfmt = (Dfmt ==
DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7482 Nfmt = (Nfmt ==
NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7488ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &
Format) {
7489 using namespace llvm::AMDGPU::MTBUFFormat;
7493 if (!tryParseFmt(
"format", UFMT_MAX, Fmt))
7496 if (Fmt == UFMT_UNDEF)
7503bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
7505 StringRef FormatStr,
7507 using namespace llvm::AMDGPU::MTBUFFormat;
7511 if (
Format != DFMT_UNDEF) {
7517 if (
Format != NFMT_UNDEF) {
7522 Error(Loc,
"unsupported format");
7526ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7529 using namespace llvm::AMDGPU::MTBUFFormat;
7533 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
7538 SMLoc Loc = getLoc();
7539 if (!parseId(Str,
"expected a format string") ||
7540 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
7542 if (Dfmt == DFMT_UNDEF)
7543 return Error(Loc,
"duplicate numeric format");
7544 if (Nfmt == NFMT_UNDEF)
7545 return Error(Loc,
"duplicate data format");
7548 Dfmt = (Dfmt ==
DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7549 Nfmt = (Nfmt ==
NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7553 if (Ufmt == UFMT_UNDEF)
7554 return Error(FormatLoc,
"unsupported format");
7563ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7566 using namespace llvm::AMDGPU::MTBUFFormat;
7569 if (Id == UFMT_UNDEF)
7573 return Error(Loc,
"unified format is not supported on this GPU");
7579ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &
Format) {
7580 using namespace llvm::AMDGPU::MTBUFFormat;
7581 SMLoc Loc = getLoc();
7586 return Error(Loc,
"out of range format");
7591ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &
Format) {
7592 using namespace llvm::AMDGPU::MTBUFFormat;
7598 StringRef FormatStr;
7599 SMLoc Loc = getLoc();
7600 if (!parseId(FormatStr,
"expected a format string"))
7603 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc,
Format);
7605 Res = parseSymbolicSplitFormat(FormatStr, Loc,
Format);
7615 return parseNumericFormat(
Format);
7619 using namespace llvm::AMDGPU::MTBUFFormat;
7623 SMLoc Loc = getLoc();
7633 AMDGPUOperand::CreateImm(
this,
Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7652 Res = parseSymbolicOrNumericFormat(
Format);
7657 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands[
Size - 2]);
7658 assert(
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7665 return Error(getLoc(),
"duplicate format");
7671 parseIntWithPrefix(
"offset",
Operands, AMDGPUOperand::ImmTyOffset);
7673 Res = parseIntWithPrefix(
"inst_offset",
Operands,
7674 AMDGPUOperand::ImmTyInstOffset);
7681 parseNamedBit(
"r128",
Operands, AMDGPUOperand::ImmTyR128A16);
7683 Res = parseNamedBit(
"a16",
Operands, AMDGPUOperand::ImmTyA16);
7689 parseIntWithPrefix(
"blgp",
Operands, AMDGPUOperand::ImmTyBLGP);
7692 parseOperandArrayWithPrefix(
"neg",
Operands, AMDGPUOperand::ImmTyBLGP);
7702 OptionalImmIndexMap OptionalIdx;
7704 unsigned OperandIdx[4];
7705 unsigned EnMask = 0;
7708 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
7709 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
7714 OperandIdx[SrcIdx] = Inst.
size();
7715 Op.addRegOperands(Inst, 1);
7722 OperandIdx[SrcIdx] = Inst.
size();
7728 if (
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7729 Op.addImmOperands(Inst, 1);
7733 if (
Op.isToken() && (
Op.getToken() ==
"done" ||
Op.getToken() ==
"row_en"))
7737 OptionalIdx[
Op.getImmTy()] = i;
7743 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7750 for (
auto i = 0; i < SrcIdx; ++i) {
7752 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7777 IntVal =
encode(ISA, IntVal, CntVal);
7778 if (CntVal !=
decode(ISA, IntVal)) {
7780 IntVal =
encode(ISA, IntVal, -1);
7788bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7790 SMLoc CntLoc = getLoc();
7791 StringRef CntName = getTokenStr();
7798 SMLoc ValLoc = getLoc();
7807 if (CntName ==
"vmcnt" || CntName ==
"vmcnt_sat") {
7809 }
else if (CntName ==
"expcnt" || CntName ==
"expcnt_sat") {
7811 }
else if (CntName ==
"lgkmcnt" || CntName ==
"lgkmcnt_sat") {
7814 Error(CntLoc,
"invalid counter name " + CntName);
7819 Error(ValLoc,
"too large value for " + CntName);
7828 Error(getLoc(),
"expected a counter name");
7843 if (!parseCnt(Waitcnt))
7851 Operands.push_back(AMDGPUOperand::CreateImm(
this, Waitcnt, S));
7855bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7856 SMLoc FieldLoc = getLoc();
7857 StringRef FieldName = getTokenStr();
7862 SMLoc ValueLoc = getLoc();
7869 if (FieldName ==
"instid0") {
7871 }
else if (FieldName ==
"instskip") {
7873 }
else if (FieldName ==
"instid1") {
7876 Error(FieldLoc,
"invalid field name " + FieldName);
7895 .Case(
"VALU_DEP_1", 1)
7896 .Case(
"VALU_DEP_2", 2)
7897 .Case(
"VALU_DEP_3", 3)
7898 .Case(
"VALU_DEP_4", 4)
7899 .Case(
"TRANS32_DEP_1", 5)
7900 .Case(
"TRANS32_DEP_2", 6)
7901 .Case(
"TRANS32_DEP_3", 7)
7902 .Case(
"FMA_ACCUM_CYCLE_1", 8)
7903 .Case(
"SALU_CYCLE_1", 9)
7904 .Case(
"SALU_CYCLE_2", 10)
7905 .Case(
"SALU_CYCLE_3", 11)
7913 Delay |=
Value << Shift;
7923 if (!parseDelay(Delay))
7931 Operands.push_back(AMDGPUOperand::CreateImm(
this, Delay, S));
7936AMDGPUOperand::isSWaitCnt()
const {
7940bool AMDGPUOperand::isSDelayALU()
const {
return isImm(); }
7946void AMDGPUAsmParser::depCtrError(SMLoc Loc,
int ErrorId,
7947 StringRef DepCtrName) {
7950 Error(Loc, Twine(
"invalid counter name ", DepCtrName));
7953 Error(Loc, Twine(DepCtrName,
" is not supported on this GPU"));
7956 Error(Loc, Twine(
"duplicate counter name ", DepCtrName));
7959 Error(Loc, Twine(
"invalid value for ", DepCtrName));
7966bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr,
unsigned &UsedOprMask) {
7968 using namespace llvm::AMDGPU::DepCtr;
7970 SMLoc DepCtrLoc = getLoc();
7971 StringRef DepCtrName = getTokenStr();
7981 unsigned PrevOprMask = UsedOprMask;
7982 int CntVal =
encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7985 depCtrError(DepCtrLoc, CntVal, DepCtrName);
7994 Error(getLoc(),
"expected a counter name");
7999 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
8000 DepCtr = (DepCtr & ~CntValMask) | CntVal;
8005 using namespace llvm::AMDGPU::DepCtr;
8008 SMLoc Loc = getLoc();
8011 unsigned UsedOprMask = 0;
8013 if (!parseDepCtr(DepCtr, UsedOprMask))
8021 Operands.push_back(AMDGPUOperand::CreateImm(
this, DepCtr, Loc));
8025bool AMDGPUOperand::isDepCtr()
const {
return isS16Imm(); }
8031ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
8033 OperandInfoTy &Width) {
8034 using namespace llvm::AMDGPU::Hwreg;
8040 HwReg.Loc = getLoc();
8043 HwReg.IsSymbolic =
true;
8045 }
else if (!
parseExpr(HwReg.Val,
"a register name")) {
8053 if (!skipToken(
AsmToken::Comma,
"expected a comma or a closing parenthesis"))
8063 Width.Loc = getLoc();
8072 using namespace llvm::AMDGPU::Hwreg;
8075 SMLoc Loc = getLoc();
8077 StructuredOpField HwReg(
"id",
"hardware register", HwregId::Width,
8079 StructuredOpField
Offset(
"offset",
"bit offset", HwregOffset::Width,
8080 HwregOffset::Default);
8081 struct : StructuredOpField {
8082 using StructuredOpField::StructuredOpField;
8083 bool validate(AMDGPUAsmParser &Parser)
const override {
8085 return Error(Parser,
"only values from 1 to 32 are legal");
8088 } Width(
"size",
"bitfield width", HwregSize::Width, HwregSize::Default);
8089 ParseStatus Res = parseStructuredOpFields({&HwReg, &
Offset, &Width});
8092 Res = parseHwregFunc(HwReg,
Offset, Width);
8095 if (!validateStructuredOpFields({&HwReg, &
Offset, &Width}))
8097 ImmVal = HwregEncoding::encode(HwReg.Val,
Offset.Val, Width.Val);
8101 parseExpr(ImmVal,
"a hwreg macro, structured immediate"))
8108 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
8110 AMDGPUOperand::CreateImm(
this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
8114bool AMDGPUOperand::isHwreg()
const {
8115 return isImmTy(ImmTyHwreg);
8123AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
8125 OperandInfoTy &Stream) {
8126 using namespace llvm::AMDGPU::SendMsg;
8131 Msg.IsSymbolic =
true;
8133 }
else if (!
parseExpr(Msg.Val,
"a message name")) {
8138 Op.IsDefined =
true;
8141 (
Op.Val =
getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
8144 }
else if (!
parseExpr(
Op.Val,
"an operation name")) {
8149 Stream.IsDefined =
true;
8150 Stream.Loc = getLoc();
8160AMDGPUAsmParser::validateSendMsg(
const OperandInfoTy &Msg,
8161 const OperandInfoTy &
Op,
8162 const OperandInfoTy &Stream) {
8163 using namespace llvm::AMDGPU::SendMsg;
8168 bool Strict = Msg.IsSymbolic;
8172 Error(Msg.Loc,
"specified message id is not supported on this GPU");
8177 Error(Msg.Loc,
"invalid message id");
8183 Error(
Op.Loc,
"message does not support operations");
8185 Error(Msg.Loc,
"missing message operation");
8191 Error(
Op.Loc,
"specified operation id is not supported on this GPU");
8193 Error(
Op.Loc,
"invalid operation id");
8198 Error(Stream.Loc,
"message operation does not support streams");
8202 Error(Stream.Loc,
"invalid message stream id");
8209 using namespace llvm::AMDGPU::SendMsg;
8212 SMLoc Loc = getLoc();
8216 OperandInfoTy
Op(OP_NONE_);
8217 OperandInfoTy Stream(STREAM_ID_NONE_);
8218 if (parseSendMsgBody(Msg,
Op, Stream) &&
8219 validateSendMsg(Msg,
Op, Stream)) {
8224 }
else if (
parseExpr(ImmVal,
"a sendmsg macro")) {
8226 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
8231 Operands.push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
8235bool AMDGPUOperand::isSendMsg()
const {
8236 return isImmTy(ImmTySendMsg);
8250 int Slot = StringSwitch<int>(Str)
8257 return Error(S,
"invalid interpolation slot");
8259 Operands.push_back(AMDGPUOperand::CreateImm(
this, Slot, S,
8260 AMDGPUOperand::ImmTyInterpSlot));
8271 if (!Str.starts_with(
"attr"))
8272 return Error(S,
"invalid interpolation attribute");
8274 StringRef Chan = Str.take_back(2);
8275 int AttrChan = StringSwitch<int>(Chan)
8282 return Error(S,
"invalid or missing interpolation attribute channel");
8284 Str = Str.drop_back(2).drop_front(4);
8287 if (Str.getAsInteger(10, Attr))
8288 return Error(S,
"invalid or missing interpolation attribute number");
8291 return Error(S,
"out of bounds interpolation attribute number");
8295 Operands.push_back(AMDGPUOperand::CreateImm(
this, Attr, S,
8296 AMDGPUOperand::ImmTyInterpAttr));
8297 Operands.push_back(AMDGPUOperand::CreateImm(
8298 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
8307 using namespace llvm::AMDGPU::Exp;
8317 return Error(S, (Id == ET_INVALID)
8318 ?
"invalid exp target"
8319 :
"exp target is not supported on this GPU");
8321 Operands.push_back(AMDGPUOperand::CreateImm(
this, Id, S,
8322 AMDGPUOperand::ImmTyExpTgt));
8331AMDGPUAsmParser::isId(
const AsmToken &Token,
const StringRef Id)
const {
8336AMDGPUAsmParser::isId(
const StringRef Id)
const {
8342 return getTokenKind() ==
Kind;
8345StringRef AMDGPUAsmParser::getId()
const {
8350AMDGPUAsmParser::trySkipId(
const StringRef Id) {
8359AMDGPUAsmParser::trySkipId(
const StringRef Pref,
const StringRef Id) {
8361 StringRef Tok = getTokenStr();
8372 if (isId(Id) && peekToken().is(Kind)) {
8382 if (isToken(Kind)) {
8391 const StringRef ErrMsg) {
8392 if (!trySkipToken(Kind)) {
8393 Error(getLoc(), ErrMsg);
8400AMDGPUAsmParser::parseExpr(int64_t &
Imm, StringRef Expected) {
8404 if (Parser.parseExpression(Expr))
8407 if (Expr->evaluateAsAbsolute(
Imm))
8410 if (Expected.empty()) {
8411 Error(S,
"expected absolute expression");
8413 Error(S, Twine(
"expected ", Expected) +
8414 Twine(
" or an absolute expression"));
8424 if (Parser.parseExpression(Expr))
8428 if (Expr->evaluateAsAbsolute(IntVal)) {
8429 Operands.push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
8431 Operands.push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
8437AMDGPUAsmParser::parseString(StringRef &Val,
const StringRef ErrMsg) {
8439 Val =
getToken().getStringContents();
8443 Error(getLoc(), ErrMsg);
8448AMDGPUAsmParser::parseId(StringRef &Val,
const StringRef ErrMsg) {
8450 Val = getTokenStr();
8454 if (!ErrMsg.
empty())
8455 Error(getLoc(), ErrMsg);
8460AMDGPUAsmParser::getToken()
const {
8461 return Parser.getTok();
8464AsmToken AMDGPUAsmParser::peekToken(
bool ShouldSkipSpace) {
8467 : getLexer().peekTok(ShouldSkipSpace);
8472 auto TokCount = getLexer().peekTokens(Tokens);
8474 for (
auto Idx = TokCount; Idx < Tokens.
size(); ++Idx)
8479AMDGPUAsmParser::getTokenKind()
const {
8480 return getLexer().getKind();
8484AMDGPUAsmParser::getLoc()
const {
8489AMDGPUAsmParser::getTokenStr()
const {
8494AMDGPUAsmParser::lex() {
8499 return ((AMDGPUOperand &)*
Operands[0]).getStartLoc();
8503SMLoc AMDGPUAsmParser::getLaterLoc(SMLoc a, SMLoc b) {
8508 int MCOpIdx)
const {
8510 const auto TargetOp =
static_cast<AMDGPUOperand &
>(*Op);
8511 if (TargetOp.getMCOpIdx() == MCOpIdx)
8512 return TargetOp.getStartLoc();
8518AMDGPUAsmParser::getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
8520 for (
unsigned i =
Operands.size() - 1; i > 0; --i) {
8521 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
8523 return Op.getStartLoc();
8529AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy
Type,
8531 auto Test = [=](
const AMDGPUOperand&
Op) {
return Op.isImmTy(
Type); };
8546 StringRef
Id = getTokenStr();
8547 SMLoc IdLoc = getLoc();
8553 find_if(Fields, [Id](StructuredOpField *
F) {
return F->Id ==
Id; });
8554 if (
I == Fields.
end())
8555 return Error(IdLoc,
"unknown field");
8556 if ((*I)->IsDefined)
8557 return Error(IdLoc,
"duplicate field");
8560 (*I)->Loc = getLoc();
8563 (*I)->IsDefined =
true;
8570bool AMDGPUAsmParser::validateStructuredOpFields(
8572 return all_of(Fields, [
this](
const StructuredOpField *
F) {
8573 return F->validate(*
this);
8584 const unsigned OrMask,
8585 const unsigned XorMask) {
8594bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &
Op,
const unsigned MinVal,
8595 const unsigned MaxVal,
8596 const Twine &ErrMsg, SMLoc &Loc) {
8613AMDGPUAsmParser::parseSwizzleOperands(
const unsigned OpNum, int64_t*
Op,
8614 const unsigned MinVal,
8615 const unsigned MaxVal,
8616 const StringRef ErrMsg) {
8618 for (
unsigned i = 0; i < OpNum; ++i) {
8619 if (!parseSwizzleOperand(
Op[i], MinVal, MaxVal, ErrMsg, Loc))
8627AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &
Imm) {
8628 using namespace llvm::AMDGPU::Swizzle;
8631 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8632 "expected a 2-bit lane id")) {
8643AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &
Imm) {
8644 using namespace llvm::AMDGPU::Swizzle;
8650 if (!parseSwizzleOperand(GroupSize,
8652 "group size must be in the interval [2,32]",
8657 Error(Loc,
"group size must be a power of two");
8660 if (parseSwizzleOperand(LaneIdx,
8662 "lane id must be in the interval [0,group size - 1]",
8671AMDGPUAsmParser::parseSwizzleReverse(int64_t &
Imm) {
8672 using namespace llvm::AMDGPU::Swizzle;
8677 if (!parseSwizzleOperand(GroupSize,
8679 "group size must be in the interval [2,32]",
8684 Error(Loc,
"group size must be a power of two");
8693AMDGPUAsmParser::parseSwizzleSwap(int64_t &
Imm) {
8694 using namespace llvm::AMDGPU::Swizzle;
8699 if (!parseSwizzleOperand(GroupSize,
8701 "group size must be in the interval [1,16]",
8706 Error(Loc,
"group size must be a power of two");
8715AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &
Imm) {
8716 using namespace llvm::AMDGPU::Swizzle;
8723 SMLoc StrLoc = getLoc();
8724 if (!parseString(Ctl)) {
8727 if (Ctl.
size() != BITMASK_WIDTH) {
8728 Error(StrLoc,
"expected a 5-character mask");
8732 unsigned AndMask = 0;
8733 unsigned OrMask = 0;
8734 unsigned XorMask = 0;
8736 for (
size_t i = 0; i < Ctl.
size(); ++i) {
8740 Error(StrLoc,
"invalid mask");
8761bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &
Imm) {
8762 using namespace llvm::AMDGPU::Swizzle;
8765 Error(getLoc(),
"FFT mode swizzle not supported on this GPU");
8771 if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
8772 "FFT swizzle must be in the interval [0," +
8773 Twine(FFT_SWIZZLE_MAX) + Twine(
']'),
8781bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &
Imm) {
8782 using namespace llvm::AMDGPU::Swizzle;
8785 Error(getLoc(),
"Rotate mode swizzle not supported on this GPU");
8792 if (!parseSwizzleOperand(
Direction, 0, 1,
8793 "direction must be 0 (left) or 1 (right)", Loc))
8797 if (!parseSwizzleOperand(
8798 RotateSize, 0, ROTATE_MAX_SIZE,
8799 "number of threads to rotate must be in the interval [0," +
8800 Twine(ROTATE_MAX_SIZE) + Twine(
']'),
8805 (RotateSize << ROTATE_SIZE_SHIFT);
8810AMDGPUAsmParser::parseSwizzleOffset(int64_t &
Imm) {
8812 SMLoc OffsetLoc = getLoc();
8818 Error(OffsetLoc,
"expected a 16-bit offset");
8825AMDGPUAsmParser::parseSwizzleMacro(int64_t &
Imm) {
8826 using namespace llvm::AMDGPU::Swizzle;
8830 SMLoc ModeLoc = getLoc();
8833 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8834 Ok = parseSwizzleQuadPerm(
Imm);
8835 }
else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8836 Ok = parseSwizzleBitmaskPerm(
Imm);
8837 }
else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8838 Ok = parseSwizzleBroadcast(
Imm);
8839 }
else if (trySkipId(IdSymbolic[ID_SWAP])) {
8840 Ok = parseSwizzleSwap(
Imm);
8841 }
else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8842 Ok = parseSwizzleReverse(
Imm);
8843 }
else if (trySkipId(IdSymbolic[ID_FFT])) {
8844 Ok = parseSwizzleFFT(
Imm);
8845 }
else if (trySkipId(IdSymbolic[ID_ROTATE])) {
8846 Ok = parseSwizzleRotate(
Imm);
8848 Error(ModeLoc,
"expected a swizzle mode");
8851 return Ok && skipToken(
AsmToken::RParen,
"expected a closing parentheses");
8861 if (trySkipId(
"offset")) {
8865 if (trySkipId(
"swizzle")) {
8866 Ok = parseSwizzleMacro(
Imm);
8868 Ok = parseSwizzleOffset(
Imm);
8872 Operands.push_back(AMDGPUOperand::CreateImm(
this,
Imm, S, AMDGPUOperand::ImmTySwizzle));
8880AMDGPUOperand::isSwizzle()
const {
8881 return isImmTy(ImmTySwizzle);
8888int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8890 using namespace llvm::AMDGPU::VGPRIndexMode;
8902 for (
unsigned ModeId = ID_MIN; ModeId <=
ID_MAX; ++ModeId) {
8903 if (trySkipId(IdSymbolic[ModeId])) {
8911 "expected a VGPR index mode or a closing parenthesis" :
8912 "expected a VGPR index mode");
8917 Error(S,
"duplicate VGPR index mode");
8925 "expected a comma or a closing parenthesis"))
8934 using namespace llvm::AMDGPU::VGPRIndexMode;
8940 Imm = parseGPRIdxMacro();
8944 if (getParser().parseAbsoluteExpression(
Imm))
8947 return Error(S,
"invalid immediate: only 4-bit values are legal");
8951 AMDGPUOperand::CreateImm(
this,
Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
8955bool AMDGPUOperand::isGPRIdxMode()
const {
8956 return isImmTy(ImmTyGprIdxMode);
8968 if (isRegister() || isModifier())
8975 assert(Opr.isImm() || Opr.isExpr());
8976 SMLoc Loc = Opr.getStartLoc();
8980 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
8981 Error(Loc,
"expected an absolute expression or a label");
8982 }
else if (Opr.isImm() && !Opr.isS16Imm()) {
8983 Error(Loc,
"expected a 16-bit signed jump offset");
9001void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
9004 OptionalImmIndexMap OptionalIdx;
9005 unsigned FirstOperandIdx = 1;
9006 bool IsAtomicReturn =
false;
9013 for (
unsigned i = FirstOperandIdx, e =
Operands.size(); i != e; ++i) {
9014 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
9018 Op.addRegOperands(Inst, 1);
9022 if (IsAtomicReturn && i == FirstOperandIdx)
9023 Op.addRegOperands(Inst, 1);
9028 if (
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
9029 Op.addImmOperands(Inst, 1);
9041 OptionalIdx[
Op.getImmTy()] = i;
9052bool AMDGPUOperand::isSMRDOffset8()
const {
9056bool AMDGPUOperand::isSMEMOffset()
const {
9058 return isImmLiteral();
9061bool AMDGPUOperand::isSMRDLiteralOffset()
const {
9096bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
9097 if (BoundCtrl == 0 || BoundCtrl == 1) {
9105void AMDGPUAsmParser::onBeginOfFile() {
9106 if (!getParser().getStreamer().getTargetStreamer() ||
9110 if (!getTargetStreamer().getTargetID())
9111 getTargetStreamer().initializeTargetID(getSTI(),
9112 getSTI().getFeatureString());
9115 getTargetStreamer().EmitDirectiveAMDGCNTarget();
9123bool AMDGPUAsmParser::parsePrimaryExpr(
const MCExpr *&Res, SMLoc &EndLoc) {
9127 StringRef TokenId = getTokenStr();
9128 AGVK VK = StringSwitch<AGVK>(TokenId)
9129 .Case(
"max", AGVK::AGVK_Max)
9130 .Case(
"or", AGVK::AGVK_Or)
9131 .Case(
"extrasgprs", AGVK::AGVK_ExtraSGPRs)
9132 .Case(
"totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
9133 .Case(
"alignto", AGVK::AGVK_AlignTo)
9134 .Case(
"occupancy", AGVK::AGVK_Occupancy)
9135 .Default(AGVK::AGVK_None);
9139 uint64_t CommaCount = 0;
9144 if (Exprs.
empty()) {
9146 "empty " + Twine(TokenId) +
" expression");
9149 if (CommaCount + 1 != Exprs.
size()) {
9151 "mismatch of commas in " + Twine(TokenId) +
" expression");
9158 if (getParser().parseExpression(Expr, EndLoc))
9162 if (LastTokenWasComma)
9166 "unexpected token in " + Twine(TokenId) +
" expression");
9172 return getParser().parsePrimaryExpr(Res, EndLoc,
nullptr);
9176 StringRef
Name = getTokenStr();
9177 if (Name ==
"mul") {
9178 return parseIntWithPrefix(
"mul",
Operands,
9182 if (Name ==
"div") {
9183 return parseIntWithPrefix(
"div",
Operands,
9194 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9199 const AMDGPU::OpName
Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9200 AMDGPU::OpName::src2};
9208 int DstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
9213 int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0_modifiers);
9215 if (
DstOp.isReg() &&
9216 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(
DstOp.
getReg())) {
9220 if ((OpSel & (1 << SrcNum)) != 0)
9226void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
9233 OptionalImmIndexMap &OptionalIdx) {
9234 cvtVOP3P(Inst,
Operands, OptionalIdx);
9243 &&
Desc.NumOperands > (OpNum + 1)
9245 &&
Desc.operands()[OpNum + 1].RegClass != -1
9247 &&
Desc.getOperandConstraint(OpNum + 1,
9251void AMDGPUAsmParser::cvtOpSelHelper(MCInst &Inst,
unsigned OpSel) {
9253 constexpr AMDGPU::OpName
Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9254 AMDGPU::OpName::src2};
9255 constexpr AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9256 AMDGPU::OpName::src1_modifiers,
9257 AMDGPU::OpName::src2_modifiers};
9258 for (
int J = 0; J < 3; ++J) {
9259 int OpIdx = AMDGPU::getNamedOperandIdx(
Opc,
Ops[J]);
9265 int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
9268 if ((OpSel & (1 << J)) != 0)
9271 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && (OpSel & (1 << 3)) != 0)
9280 OptionalImmIndexMap OptionalIdx;
9285 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9286 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
9290 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
9292 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9293 }
else if (
Op.isInterpSlot() ||
Op.isInterpAttr() ||
9294 Op.isInterpAttrChan()) {
9296 }
else if (
Op.isImmModifier()) {
9297 OptionalIdx[
Op.getImmTy()] =
I;
9305 AMDGPUOperand::ImmTyHigh);
9309 AMDGPUOperand::ImmTyClamp);
9313 AMDGPUOperand::ImmTyOModSI);
9318 AMDGPUOperand::ImmTyOpSel);
9319 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9322 cvtOpSelHelper(Inst, OpSel);
9328 OptionalImmIndexMap OptionalIdx;
9333 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9334 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
9338 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
9340 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9341 }
else if (
Op.isImmModifier()) {
9342 OptionalIdx[
Op.getImmTy()] =
I;
9350 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9360 cvtOpSelHelper(Inst, OpSel);
9363void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
9365 OptionalImmIndexMap OptionalIdx;
9368 int CbszOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::cbsz);
9372 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J)
9373 static_cast<AMDGPUOperand &
>(*
Operands[
I++]).addRegOperands(Inst, 1);
9376 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands[
I]);
9381 if (NumOperands == CbszOpIdx) {
9386 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9387 }
else if (
Op.isImmModifier()) {
9388 OptionalIdx[
Op.getImmTy()] =
I;
9390 Op.addRegOrImmOperands(Inst, 1);
9395 auto CbszIdx = OptionalIdx.find(AMDGPUOperand::ImmTyCBSZ);
9396 if (CbszIdx != OptionalIdx.end()) {
9397 int CbszVal = ((AMDGPUOperand &)*
Operands[CbszIdx->second]).
getImm();
9401 int BlgpOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::blgp);
9402 auto BlgpIdx = OptionalIdx.find(AMDGPUOperand::ImmTyBLGP);
9403 if (BlgpIdx != OptionalIdx.end()) {
9404 int BlgpVal = ((AMDGPUOperand &)*
Operands[BlgpIdx->second]).
getImm();
9415 auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
9416 if (OpselIdx != OptionalIdx.end()) {
9417 OpSel =
static_cast<const AMDGPUOperand &
>(*
Operands[OpselIdx->second])
9421 unsigned OpSelHi = 0;
9422 auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
9423 if (OpselHiIdx != OptionalIdx.end()) {
9424 OpSelHi =
static_cast<const AMDGPUOperand &
>(*
Operands[OpselHiIdx->second])
9427 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9428 AMDGPU::OpName::src1_modifiers};
9430 for (
unsigned J = 0; J < 2; ++J) {
9431 unsigned ModVal = 0;
9432 if (OpSel & (1 << J))
9434 if (OpSelHi & (1 << J))
9437 const int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
9443 OptionalImmIndexMap &OptionalIdx) {
9448 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9449 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
9453 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
9455 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9456 }
else if (
Op.isImmModifier()) {
9457 OptionalIdx[
Op.getImmTy()] =
I;
9459 Op.addRegOrImmOperands(Inst, 1);
9465 AMDGPUOperand::ImmTyScaleSel);
9469 AMDGPUOperand::ImmTyClamp);
9475 AMDGPUOperand::ImmTyByteSel);
9480 AMDGPUOperand::ImmTyOModSI);
9487 auto *it = Inst.
begin();
9488 std::advance(it, AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2_modifiers));
9497 OptionalImmIndexMap OptionalIdx;
9498 cvtVOP3(Inst,
Operands, OptionalIdx);
9502 OptionalImmIndexMap &OptIdx) {
9508 if (
Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
9509 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
9510 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
9511 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
9512 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
9513 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
9521 !(
Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx12 ||
9522 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx12 ||
9523 Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx12 ||
9524 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx12 ||
9525 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp_gfx12 ||
9526 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp_gfx12 ||
9527 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp8_gfx12 ||
9528 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp8_gfx12 ||
9529 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
9530 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9531 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp_gfx1250 ||
9532 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp8_gfx1250 ||
9533 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9534 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9535 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp_gfx1250 ||
9536 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp_gfx1250 ||
9537 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp8_gfx1250 ||
9538 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp8_gfx1250 ||
9539 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_gfx1250 ||
9540 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_gfx1250 ||
9541 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp_gfx1250 ||
9542 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp_gfx1250 ||
9543 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp8_gfx1250 ||
9544 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp8_gfx1250 ||
9545 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_gfx1250 ||
9546 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_gfx1250)) {
9550 int BitOp3Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::bitop3);
9551 if (BitOp3Idx != -1) {
9558 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9559 if (OpSelIdx != -1) {
9563 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel_hi);
9564 if (OpSelHiIdx != -1) {
9571 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_a_fmt);
9572 if (MatrixAFMTIdx != -1) {
9574 AMDGPUOperand::ImmTyMatrixAFMT, 0);
9578 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_b_fmt);
9579 if (MatrixBFMTIdx != -1) {
9581 AMDGPUOperand::ImmTyMatrixBFMT, 0);
9584 int MatrixAScaleIdx =
9585 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_a_scale);
9586 if (MatrixAScaleIdx != -1) {
9588 AMDGPUOperand::ImmTyMatrixAScale, 0);
9591 int MatrixBScaleIdx =
9592 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_b_scale);
9593 if (MatrixBScaleIdx != -1) {
9595 AMDGPUOperand::ImmTyMatrixBScale, 0);
9598 int MatrixAScaleFmtIdx =
9599 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_a_scale_fmt);
9600 if (MatrixAScaleFmtIdx != -1) {
9602 AMDGPUOperand::ImmTyMatrixAScaleFmt, 0);
9605 int MatrixBScaleFmtIdx =
9606 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_b_scale_fmt);
9607 if (MatrixBScaleFmtIdx != -1) {
9609 AMDGPUOperand::ImmTyMatrixBScaleFmt, 0);
9614 AMDGPUOperand::ImmTyMatrixAReuse, 0);
9618 AMDGPUOperand::ImmTyMatrixBReuse, 0);
9620 int NegLoIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::neg_lo);
9624 int NegHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::neg_hi);
9628 const AMDGPU::OpName
Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9629 AMDGPU::OpName::src2};
9630 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9631 AMDGPU::OpName::src1_modifiers,
9632 AMDGPU::OpName::src2_modifiers};
9635 unsigned OpSelHi = 0;
9642 if (OpSelHiIdx != -1)
9651 for (
int J = 0; J < 3; ++J) {
9652 int OpIdx = AMDGPU::getNamedOperandIdx(
Opc,
Ops[J]);
9656 int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
9661 uint32_t ModVal = 0;
9664 if (SrcOp.
isReg() && getMRI()
9671 if ((OpSel & (1 << J)) != 0)
9675 if ((OpSelHi & (1 << J)) != 0)
9678 if ((NegLo & (1 << J)) != 0)
9681 if ((NegHi & (1 << J)) != 0)
9689 OptionalImmIndexMap OptIdx;
9695 unsigned i,
unsigned Opc,
9697 if (AMDGPU::getNamedOperandIdx(
Opc,
OpName) != -1)
9698 ((AMDGPUOperand &)*
Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
9700 ((AMDGPUOperand &)*
Operands[i]).addRegOperands(Inst, 1);
9706 ((AMDGPUOperand &)*
Operands[1]).addRegOperands(Inst, 1);
9709 ((AMDGPUOperand &)*
Operands[1]).addRegOperands(Inst, 1);
9710 ((AMDGPUOperand &)*
Operands[4]).addRegOperands(Inst, 1);
9712 OptionalImmIndexMap OptIdx;
9713 for (
unsigned i = 5; i <
Operands.size(); ++i) {
9714 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
9715 OptIdx[
Op.getImmTy()] = i;
9720 AMDGPUOperand::ImmTyIndexKey8bit);
9724 AMDGPUOperand::ImmTyIndexKey16bit);
9728 AMDGPUOperand::ImmTyIndexKey32bit);
9748 Operands.push_back(AMDGPUOperand::CreateToken(
this,
"::", S));
9749 SMLoc OpYLoc = getLoc();
9752 Operands.push_back(AMDGPUOperand::CreateToken(
this, OpYName, OpYLoc));
9755 return Error(OpYLoc,
"expected a VOPDY instruction after ::");
9764 auto addOp = [&](uint16_t ParsedOprIdx) {
9765 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[ParsedOprIdx]);
9767 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9771 Op.addRegOperands(Inst, 1);
9775 Op.addImmOperands(Inst, 1);
9787 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
9791 const auto &CInfo = InstInfo[CompIdx];
9792 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
9793 for (
unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
9794 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
9795 if (CInfo.hasSrc2Acc())
9796 addOp(CInfo.getIndexOfDstInParsedOperands());
9800 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::bitop3);
9801 if (BitOp3Idx != -1) {
9802 OptionalImmIndexMap OptIdx;
9803 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands.back());
9815bool AMDGPUOperand::isDPP8()
const {
9816 return isImmTy(ImmTyDPP8);
9819bool AMDGPUOperand::isDPPCtrl()
const {
9820 using namespace AMDGPU::DPP;
9822 bool result = isImm() && getImmTy() == ImmTyDppCtrl &&
isUInt<9>(
getImm());
9825 return (
Imm >= DppCtrl::QUAD_PERM_FIRST &&
Imm <= DppCtrl::QUAD_PERM_LAST) ||
9826 (
Imm >= DppCtrl::ROW_SHL_FIRST &&
Imm <= DppCtrl::ROW_SHL_LAST) ||
9827 (
Imm >= DppCtrl::ROW_SHR_FIRST &&
Imm <= DppCtrl::ROW_SHR_LAST) ||
9828 (
Imm >= DppCtrl::ROW_ROR_FIRST &&
Imm <= DppCtrl::ROW_ROR_LAST) ||
9829 (
Imm == DppCtrl::WAVE_SHL1) ||
9830 (
Imm == DppCtrl::WAVE_ROL1) ||
9831 (
Imm == DppCtrl::WAVE_SHR1) ||
9832 (
Imm == DppCtrl::WAVE_ROR1) ||
9833 (
Imm == DppCtrl::ROW_MIRROR) ||
9834 (
Imm == DppCtrl::ROW_HALF_MIRROR) ||
9835 (
Imm == DppCtrl::BCAST15) ||
9836 (
Imm == DppCtrl::BCAST31) ||
9837 (
Imm >= DppCtrl::ROW_SHARE_FIRST &&
Imm <= DppCtrl::ROW_SHARE_LAST) ||
9838 (
Imm >= DppCtrl::ROW_XMASK_FIRST &&
Imm <= DppCtrl::ROW_XMASK_LAST);
9847bool AMDGPUOperand::isBLGP()
const {
9851bool AMDGPUOperand::isS16Imm()
const {
9855bool AMDGPUOperand::isU16Imm()
const {
9863bool AMDGPUAsmParser::parseDimId(
unsigned &Encoding) {
9868 SMLoc Loc =
getToken().getEndLoc();
9869 Token = std::string(getTokenStr());
9871 if (getLoc() != Loc)
9876 if (!parseId(Suffix))
9880 StringRef DimId = Token;
9901 SMLoc Loc = getLoc();
9902 if (!parseDimId(Encoding))
9903 return Error(Loc,
"invalid dim value");
9905 Operands.push_back(AMDGPUOperand::CreateImm(
this, Encoding, S,
9906 AMDGPUOperand::ImmTyDim));
9924 if (!skipToken(
AsmToken::LBrac,
"expected an opening square bracket"))
9927 for (
size_t i = 0; i < 8; ++i) {
9931 SMLoc Loc = getLoc();
9932 if (getParser().parseAbsoluteExpression(Sels[i]))
9934 if (0 > Sels[i] || 7 < Sels[i])
9935 return Error(Loc,
"expected a 3-bit value");
9942 for (
size_t i = 0; i < 8; ++i)
9943 DPP8 |= (Sels[i] << (i * 3));
9945 Operands.push_back(AMDGPUOperand::CreateImm(
this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9950AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
9952 if (Ctrl ==
"row_newbcast")
9955 if (Ctrl ==
"row_share" ||
9956 Ctrl ==
"row_xmask")
9959 if (Ctrl ==
"wave_shl" ||
9960 Ctrl ==
"wave_shr" ||
9961 Ctrl ==
"wave_rol" ||
9962 Ctrl ==
"wave_ror" ||
9963 Ctrl ==
"row_bcast")
9966 return Ctrl ==
"row_mirror" ||
9967 Ctrl ==
"row_half_mirror" ||
9968 Ctrl ==
"quad_perm" ||
9969 Ctrl ==
"row_shl" ||
9970 Ctrl ==
"row_shr" ||
9975AMDGPUAsmParser::parseDPPCtrlPerm() {
9978 if (!skipToken(
AsmToken::LBrac,
"expected an opening square bracket"))
9982 for (
int i = 0; i < 4; ++i) {
9987 SMLoc Loc = getLoc();
9988 if (getParser().parseAbsoluteExpression(Temp))
9990 if (Temp < 0 || Temp > 3) {
9991 Error(Loc,
"expected a 2-bit value");
9995 Val += (Temp << i * 2);
10005AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
10006 using namespace AMDGPU::DPP;
10011 SMLoc Loc = getLoc();
10013 if (getParser().parseAbsoluteExpression(Val))
10016 struct DppCtrlCheck {
10022 DppCtrlCheck
Check = StringSwitch<DppCtrlCheck>(Ctrl)
10023 .Case(
"wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
10024 .Case(
"wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
10025 .Case(
"wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
10026 .Case(
"wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
10027 .Case(
"row_shl", {DppCtrl::ROW_SHL0, 1, 15})
10028 .Case(
"row_shr", {DppCtrl::ROW_SHR0, 1, 15})
10029 .Case(
"row_ror", {DppCtrl::ROW_ROR0, 1, 15})
10030 .Case(
"row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
10031 .Case(
"row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
10032 .Case(
"row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
10036 if (
Check.Ctrl == -1) {
10037 Valid = (
Ctrl ==
"row_bcast" && (Val == 15 || Val == 31));
10045 Error(Loc, Twine(
"invalid ", Ctrl) + Twine(
" value"));
10053 using namespace AMDGPU::DPP;
10056 !isSupportedDPPCtrl(getTokenStr(),
Operands))
10059 SMLoc S = getLoc();
10065 if (Ctrl ==
"row_mirror") {
10066 Val = DppCtrl::ROW_MIRROR;
10067 }
else if (Ctrl ==
"row_half_mirror") {
10068 Val = DppCtrl::ROW_HALF_MIRROR;
10071 if (Ctrl ==
"quad_perm") {
10072 Val = parseDPPCtrlPerm();
10074 Val = parseDPPCtrlSel(Ctrl);
10083 AMDGPUOperand::CreateImm(
this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
10089 OptionalImmIndexMap OptionalIdx;
10096 int OldIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::old);
10098 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2_modifiers);
10099 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
10103 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
10104 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
10108 int VdstInIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
10109 bool IsVOP3CvtSrDpp =
Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
10110 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
10111 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
10112 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
10118 if (OldIdx == NumOperands) {
10120 constexpr int DST_IDX = 0;
10122 }
else if (Src2ModIdx == NumOperands) {
10132 if (IsVOP3CvtSrDpp) {
10141 if (TiedTo != -1) {
10146 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
10148 if (IsDPP8 &&
Op.isDppFI()) {
10151 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10152 }
else if (
Op.isReg()) {
10153 Op.addRegOperands(Inst, 1);
10154 }
else if (
Op.isImm() &&
10156 Op.addImmOperands(Inst, 1);
10157 }
else if (
Op.isImm()) {
10158 OptionalIdx[
Op.getImmTy()] =
I;
10166 AMDGPUOperand::ImmTyClamp);
10172 AMDGPUOperand::ImmTyByteSel);
10179 cvtVOP3P(Inst,
Operands, OptionalIdx);
10181 cvtVOP3OpSel(Inst,
Operands, OptionalIdx);
10188 using namespace llvm::AMDGPU::DPP;
10198 AMDGPUOperand::ImmTyDppFI);
10203 OptionalImmIndexMap OptionalIdx;
10207 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
10208 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
10215 if (TiedTo != -1) {
10220 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
10222 if (
Op.isReg() && validateVccOperand(
Op.getReg())) {
10230 Op.addImmOperands(Inst, 1);
10232 Op.addRegWithFPInputModsOperands(Inst, 2);
10233 }
else if (
Op.isDppFI()) {
10235 }
else if (
Op.isReg()) {
10236 Op.addRegOperands(Inst, 1);
10242 Op.addRegWithFPInputModsOperands(Inst, 2);
10243 }
else if (
Op.isReg()) {
10244 Op.addRegOperands(Inst, 1);
10245 }
else if (
Op.isDPPCtrl()) {
10246 Op.addImmOperands(Inst, 1);
10247 }
else if (
Op.isImm()) {
10249 OptionalIdx[
Op.getImmTy()] =
I;
10257 using namespace llvm::AMDGPU::DPP;
10265 AMDGPUOperand::ImmTyDppFI);
10276 AMDGPUOperand::ImmTy
Type) {
10277 return parseStringOrIntWithPrefix(
10279 {
"BYTE_0",
"BYTE_1",
"BYTE_2",
"BYTE_3",
"WORD_0",
"WORD_1",
"DWORD"},
10284 return parseStringOrIntWithPrefix(
10285 Operands,
"dst_unused", {
"UNUSED_PAD",
"UNUSED_SEXT",
"UNUSED_PRESERVE"},
10286 AMDGPUOperand::ImmTySDWADstUnused);
10310 uint64_t BasicInstType,
10313 using namespace llvm::AMDGPU::SDWA;
10315 OptionalImmIndexMap OptionalIdx;
10316 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
10317 bool SkippedVcc =
false;
10321 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
10322 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
10326 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
10327 if (SkipVcc && !SkippedVcc &&
Op.isReg() &&
10328 (
Op.getReg() == AMDGPU::VCC ||
Op.getReg() == AMDGPU::VCC_LO)) {
10346 Op.addRegOrImmWithInputModsOperands(Inst, 2);
10347 }
else if (
Op.isImm()) {
10349 OptionalIdx[
Op.getImmTy()] =
I;
10353 SkippedVcc =
false;
10357 if (
Opc != AMDGPU::V_NOP_sdwa_gfx10 &&
Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
10358 Opc != AMDGPU::V_NOP_sdwa_vi) {
10360 switch (BasicInstType) {
10364 AMDGPUOperand::ImmTyClamp, 0);
10368 AMDGPUOperand::ImmTyOModSI, 0);
10372 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10376 AMDGPUOperand::ImmTySDWADstUnused,
10377 DstUnused::UNUSED_PRESERVE);
10384 AMDGPUOperand::ImmTyClamp, 0);
10398 AMDGPUOperand::ImmTyClamp, 0);
10404 llvm_unreachable(
"Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
10410 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
10411 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
10412 auto *it = Inst.
begin();
10414 it, AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::src2));
10426#define GET_MATCHER_IMPLEMENTATION
10427#define GET_MNEMONIC_SPELL_CHECKER
10428#define GET_MNEMONIC_CHECKER
10429#include "AMDGPUGenAsmMatcher.inc"
10435 return parseTokenOp(
"addr64",
Operands);
10437 return parseTokenOp(
"done",
Operands);
10439 return parseTokenOp(
"idxen",
Operands);
10441 return parseTokenOp(
"lds",
Operands);
10443 return parseTokenOp(
"offen",
Operands);
10445 return parseTokenOp(
"off",
Operands);
10446 case MCK_row_95_en:
10447 return parseTokenOp(
"row_en",
Operands);
10449 return parseNamedBit(
"gds",
Operands, AMDGPUOperand::ImmTyGDS);
10451 return parseNamedBit(
"tfe",
Operands, AMDGPUOperand::ImmTyTFE);
10453 return tryCustomParseOperand(
Operands, MCK);
10458unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &
Op,
10464 AMDGPUOperand &Operand = (AMDGPUOperand&)
Op;
10467 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
10469 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
10471 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
10473 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
10475 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
10477 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
10485 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
10487 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
10488 case MCK_SOPPBrTarget:
10489 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
10490 case MCK_VReg32OrOff:
10491 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
10492 case MCK_InterpSlot:
10493 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
10494 case MCK_InterpAttr:
10495 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
10496 case MCK_InterpAttrChan:
10497 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
10499 case MCK_SReg_64_XEXEC:
10509 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
10511 return Match_InvalidOperand;
10520 SMLoc S = getLoc();
10529 return Error(S,
"expected a 16-bit value");
10532 AMDGPUOperand::CreateImm(
this,
Imm, S, AMDGPUOperand::ImmTyEndpgm));
10536bool AMDGPUOperand::isEndpgm()
const {
return isImmTy(ImmTyEndpgm); }
10542bool AMDGPUOperand::isSplitBarrier()
const {
return isInlinableImm(MVT::i32); }
unsigned const MachineRegisterInfo * MRI
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
SmallVector< int16_t, MAX_SRC_OPERANDS_NUM > OperandIndices
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, AMDGPU::OpName OpName)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static bool IsRevOpcode(const unsigned Opcode)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static MCRegister getSpecialRegForName(StringRef RegName)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0, std::optional< unsigned > InsertAt=std::nullopt)
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
#define LLVM_EXTERNAL_VISIBILITY
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Loop::LoopBounds::Direction Direction
mir Rename Register Operands
Register const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Interface definition for SIInstrInfo.
unsigned unsigned DefaultVal
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file implements the SmallBitVector class.
StringSet - A set-like wrapper for the StringMap.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static const char * getRegisterName(MCRegister Reg)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
bool is(TokenKind K) const
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
void printExpr(raw_ostream &, const MCExpr &) const
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Instances of this class represent a single low-level machine instruction.
unsigned getNumOperands() const
unsigned getOpcode() const
iterator insert(iterator I, const MCOperand &Op)
void addOperand(const MCOperand Op)
const MCOperand & getOperand(unsigned i) const
Describe properties that are true of each instruction in the target description file.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
int16_t getOpRegClassID(const MCOperandInfo &OpInfo, unsigned HwModeId) const
Return the ID of the register class to use for OpInfo, for the active HwMode HwModeId.
Instances of this class represent operands of the MCInst class.
static MCOperand createExpr(const MCExpr *Val)
static MCOperand createReg(MCRegister Reg)
static MCOperand createImm(int64_t Val)
void setReg(MCRegister Reg)
Set the register number.
MCRegister getReg() const
Returns the register number.
const MCExpr * getExpr() const
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
constexpr bool isValid() const
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
Generic base class for all target subtargets.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
bool isVariable() const
isVariable - Check if this is a variable symbol.
LLVM_ABI void setVariableValue(const MCExpr *Value)
void setRedefinable(bool Value)
Mark this symbol as redefinable.
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
MCTargetAsmParser - Generic interface to target specific assembly parsers.
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
constexpr unsigned id() const
Represents a location in source code.
static SMLoc getFromPointer(const char *Ptr)
constexpr const char * getPointer() const
constexpr bool isValid() const
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringRef - Represent a constant reference to a string, i.e.
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
size - Get the string size.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
bool consume_front(StringRef Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
bool contains(StringRef key) const
Check if the set contains the given key.
std::pair< typename Base::iterator, bool > insert(StringRef key)
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
std::pair< iterator, bool > insert(const ValueT &V)
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY bool isLitExpr(const MCExpr *Expr)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool hasMAIInsts(const MCSubtargetInfo &STI)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
LLVM_READONLY int64_t getLitValue(const MCExpr *Expr)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
int64_t encode32BitLiteral(int64_t Imm, OperandType Type)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ UNDEF
UNDEF - An undefined node.
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
Context & getContext() const
This is an optimization pass for GlobalISel generic memory operations.
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
FunctionAddr VTableAddr Value
StringMapEntry< Value * > ValueName
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
static bool isMem(const MachineInstr &MI, unsigned Op)
LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
void PrintError(const Twine &Msg)
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Target & getTheR600Target()
The target for R600 GPUs.
SmallVectorImpl< std::unique_ptr< MCParsedAsmOperand > > OperandVector
FunctionAddr VTableAddr uintptr_t uintptr_t Version
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Sub
Subtraction of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
@ Default
The result values are uniform if and only if all operands are uniform.
int popcount(T Value) noexcept
Count the number of set bits in a value.
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, bool InitMCExpr=true)
Instruction set architecture version.
const MCExpr * compute_pgm_rsrc2
const MCExpr * kernarg_size
const MCExpr * kernarg_preload
const MCExpr * compute_pgm_rsrc3
const MCExpr * private_segment_fixed_size
const MCExpr * compute_pgm_rsrc1
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
const MCExpr * group_segment_fixed_size
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
const MCExpr * kernel_code_properties
static LLVM_ABI const fltSemantics & IEEEsingle() LLVM_READNONE
static constexpr roundingMode rmNearestTiesToEven
static LLVM_ABI const fltSemantics & IEEEdouble() LLVM_READNONE
static LLVM_ABI const fltSemantics & IEEEhalf() LLVM_READNONE
static LLVM_ABI const fltSemantics & BFloat() LLVM_READNONE
opStatus
IEEE-754R 7: Default exception handling.
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...
uint32_t group_segment_fixed_size
uint32_t private_segment_fixed_size