56enum RegisterKind { IS_UNKNOWN,
IS_VGPR, IS_SGPR,
IS_AGPR, IS_TTMP, IS_SPECIAL };
70 SMLoc StartLoc, EndLoc;
71 const AMDGPUAsmParser *AsmParser;
74 AMDGPUOperand(KindTy Kind_,
const AMDGPUAsmParser *AsmParser_)
75 :
Kind(Kind_), AsmParser(AsmParser_) {}
77 using Ptr = std::unique_ptr<AMDGPUOperand>;
86 bool hasFPModifiers()
const {
return Abs || Neg; }
87 bool hasIntModifiers()
const {
return Sext; }
88 bool hasModifiers()
const {
return hasFPModifiers() || hasIntModifiers(); }
90 int64_t getFPModifiersOperand()
const {
97 int64_t getIntModifiersOperand()
const {
103 int64_t getModifiersOperand()
const {
104 assert(!(hasFPModifiers() && hasIntModifiers())
105 &&
"fp and int modifiers should not be used simultaneously");
106 if (hasFPModifiers())
107 return getFPModifiersOperand();
108 if (hasIntModifiers())
109 return getIntModifiersOperand();
183 ImmTyMatrixAScaleFmt,
184 ImmTyMatrixBScaleFmt,
201 ImmKindTyMandatoryLiteral,
215 mutable ImmKindTy
Kind;
232 bool isToken()
const override {
return Kind == Token; }
234 bool isSymbolRefExpr()
const {
235 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
238 bool isImm()
const override {
239 return Kind == Immediate;
242 void setImmKindNone()
const {
244 Imm.Kind = ImmKindTyNone;
247 void setImmKindLiteral()
const {
249 Imm.Kind = ImmKindTyLiteral;
252 void setImmKindMandatoryLiteral()
const {
254 Imm.Kind = ImmKindTyMandatoryLiteral;
257 void setImmKindConst()
const {
259 Imm.Kind = ImmKindTyConst;
262 bool IsImmKindLiteral()
const {
263 return isImm() &&
Imm.Kind == ImmKindTyLiteral;
266 bool IsImmKindMandatoryLiteral()
const {
267 return isImm() &&
Imm.Kind == ImmKindTyMandatoryLiteral;
270 bool isImmKindConst()
const {
271 return isImm() &&
Imm.Kind == ImmKindTyConst;
274 bool isInlinableImm(
MVT type)
const;
275 bool isLiteralImm(
MVT type)
const;
277 bool isRegKind()
const {
281 bool isReg()
const override {
282 return isRegKind() && !hasModifiers();
285 bool isRegOrInline(
unsigned RCID,
MVT type)
const {
286 return isRegClass(RCID) || isInlinableImm(type);
290 return isRegOrInline(RCID, type) || isLiteralImm(type);
293 bool isRegOrImmWithInt16InputMods()
const {
297 template <
bool IsFake16>
bool isRegOrImmWithIntT16InputMods()
const {
299 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
302 bool isRegOrImmWithInt32InputMods()
const {
306 bool isRegOrInlineImmWithInt16InputMods()
const {
307 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
310 template <
bool IsFake16>
bool isRegOrInlineImmWithIntT16InputMods()
const {
311 return isRegOrInline(
312 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
315 bool isRegOrInlineImmWithInt32InputMods()
const {
316 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
319 bool isRegOrImmWithInt64InputMods()
const {
323 bool isRegOrImmWithFP16InputMods()
const {
327 template <
bool IsFake16>
bool isRegOrImmWithFPT16InputMods()
const {
329 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
332 bool isRegOrImmWithFP32InputMods()
const {
336 bool isRegOrImmWithFP64InputMods()
const {
340 template <
bool IsFake16>
bool isRegOrInlineImmWithFP16InputMods()
const {
341 return isRegOrInline(
342 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
345 bool isRegOrInlineImmWithFP32InputMods()
const {
346 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
349 bool isRegOrInlineImmWithFP64InputMods()
const {
350 return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
353 bool isVRegWithInputMods(
unsigned RCID)
const {
return isRegClass(RCID); }
355 bool isVRegWithFP32InputMods()
const {
356 return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
359 bool isVRegWithFP64InputMods()
const {
360 return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
363 bool isPackedFP16InputMods()
const {
367 bool isPackedVGPRFP32InputMods()
const {
371 bool isVReg()
const {
372 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
373 isRegClass(AMDGPU::VReg_64RegClassID) ||
374 isRegClass(AMDGPU::VReg_96RegClassID) ||
375 isRegClass(AMDGPU::VReg_128RegClassID) ||
376 isRegClass(AMDGPU::VReg_160RegClassID) ||
377 isRegClass(AMDGPU::VReg_192RegClassID) ||
378 isRegClass(AMDGPU::VReg_256RegClassID) ||
379 isRegClass(AMDGPU::VReg_512RegClassID) ||
380 isRegClass(AMDGPU::VReg_1024RegClassID);
383 bool isVReg32()
const {
384 return isRegClass(AMDGPU::VGPR_32RegClassID);
387 bool isVReg32OrOff()
const {
388 return isOff() || isVReg32();
391 bool isNull()
const {
392 return isRegKind() &&
getReg() == AMDGPU::SGPR_NULL;
395 bool isVRegWithInputMods()
const;
396 template <
bool IsFake16>
bool isT16_Lo128VRegWithInputMods()
const;
397 template <
bool IsFake16>
bool isT16VRegWithInputMods()
const;
399 bool isSDWAOperand(
MVT type)
const;
400 bool isSDWAFP16Operand()
const;
401 bool isSDWAFP32Operand()
const;
402 bool isSDWAInt16Operand()
const;
403 bool isSDWAInt32Operand()
const;
405 bool isImmTy(ImmTy ImmT)
const {
409 template <ImmTy Ty>
bool isImmTy()
const {
return isImmTy(Ty); }
411 bool isImmLiteral()
const {
return isImmTy(ImmTyNone); }
413 bool isImmModifier()
const {
414 return isImm() &&
Imm.Type != ImmTyNone;
417 bool isOModSI()
const {
return isImmTy(ImmTyOModSI); }
418 bool isDim()
const {
return isImmTy(ImmTyDim); }
419 bool isR128A16()
const {
return isImmTy(ImmTyR128A16); }
420 bool isOff()
const {
return isImmTy(ImmTyOff); }
421 bool isExpTgt()
const {
return isImmTy(ImmTyExpTgt); }
422 bool isOffen()
const {
return isImmTy(ImmTyOffen); }
423 bool isIdxen()
const {
return isImmTy(ImmTyIdxen); }
424 bool isAddr64()
const {
return isImmTy(ImmTyAddr64); }
425 bool isSMEMOffsetMod()
const {
return isImmTy(ImmTySMEMOffsetMod); }
426 bool isFlatOffset()
const {
return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
427 bool isGDS()
const {
return isImmTy(ImmTyGDS); }
428 bool isLDS()
const {
return isImmTy(ImmTyLDS); }
429 bool isCPol()
const {
return isImmTy(ImmTyCPol); }
430 bool isIndexKey8bit()
const {
return isImmTy(ImmTyIndexKey8bit); }
431 bool isIndexKey16bit()
const {
return isImmTy(ImmTyIndexKey16bit); }
432 bool isIndexKey32bit()
const {
return isImmTy(ImmTyIndexKey32bit); }
433 bool isMatrixAFMT()
const {
return isImmTy(ImmTyMatrixAFMT); }
434 bool isMatrixBFMT()
const {
return isImmTy(ImmTyMatrixBFMT); }
435 bool isMatrixAScale()
const {
return isImmTy(ImmTyMatrixAScale); }
436 bool isMatrixBScale()
const {
return isImmTy(ImmTyMatrixBScale); }
437 bool isMatrixAScaleFmt()
const {
return isImmTy(ImmTyMatrixAScaleFmt); }
438 bool isMatrixBScaleFmt()
const {
return isImmTy(ImmTyMatrixBScaleFmt); }
439 bool isMatrixAReuse()
const {
return isImmTy(ImmTyMatrixAReuse); }
440 bool isMatrixBReuse()
const {
return isImmTy(ImmTyMatrixBReuse); }
441 bool isTFE()
const {
return isImmTy(ImmTyTFE); }
442 bool isFORMAT()
const {
return isImmTy(ImmTyFORMAT) && isUInt<7>(
getImm()); }
443 bool isDppFI()
const {
return isImmTy(ImmTyDppFI); }
444 bool isSDWADstSel()
const {
return isImmTy(ImmTySDWADstSel); }
445 bool isSDWASrc0Sel()
const {
return isImmTy(ImmTySDWASrc0Sel); }
446 bool isSDWASrc1Sel()
const {
return isImmTy(ImmTySDWASrc1Sel); }
447 bool isSDWADstUnused()
const {
return isImmTy(ImmTySDWADstUnused); }
448 bool isInterpSlot()
const {
return isImmTy(ImmTyInterpSlot); }
449 bool isInterpAttr()
const {
return isImmTy(ImmTyInterpAttr); }
450 bool isInterpAttrChan()
const {
return isImmTy(ImmTyInterpAttrChan); }
451 bool isOpSel()
const {
return isImmTy(ImmTyOpSel); }
452 bool isOpSelHi()
const {
return isImmTy(ImmTyOpSelHi); }
453 bool isNegLo()
const {
return isImmTy(ImmTyNegLo); }
454 bool isNegHi()
const {
return isImmTy(ImmTyNegHi); }
455 bool isBitOp3()
const {
return isImmTy(ImmTyBitOp3) && isUInt<8>(
getImm()); }
457 bool isRegOrImm()
const {
461 bool isRegClass(
unsigned RCID)
const;
465 bool isRegOrInlineNoMods(
unsigned RCID,
MVT type)
const {
466 return isRegOrInline(RCID, type) && !hasModifiers();
469 bool isSCSrcB16()
const {
470 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
473 bool isSCSrcV2B16()
const {
477 bool isSCSrc_b32()
const {
478 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
481 bool isSCSrc_b64()
const {
482 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
485 bool isBoolReg()
const;
487 bool isSCSrcF16()
const {
488 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
491 bool isSCSrcV2F16()
const {
495 bool isSCSrcF32()
const {
496 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
499 bool isSCSrcF64()
const {
500 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
503 bool isSSrc_b32()
const {
504 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
507 bool isSSrc_b16()
const {
return isSCSrcB16() || isLiteralImm(MVT::i16); }
509 bool isSSrcV2B16()
const {
514 bool isSSrc_b64()
const {
517 return isSCSrc_b64() || isLiteralImm(MVT::i64) ||
519 ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
523 bool isSSrc_f32()
const {
524 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
527 bool isSSrcF64()
const {
return isSCSrc_b64() || isLiteralImm(MVT::f64); }
529 bool isSSrc_bf16()
const {
return isSCSrcB16() || isLiteralImm(MVT::bf16); }
531 bool isSSrc_f16()
const {
return isSCSrcB16() || isLiteralImm(MVT::f16); }
533 bool isSSrcV2F16()
const {
538 bool isSSrcV2FP32()
const {
543 bool isSCSrcV2FP32()
const {
548 bool isSSrcV2INT32()
const {
553 bool isSCSrcV2INT32()
const {
555 return isSCSrc_b32();
558 bool isSSrcOrLds_b32()
const {
559 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
560 isLiteralImm(MVT::i32) || isExpr();
563 bool isVCSrc_b32()
const {
564 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
567 bool isVCSrc_b64()
const {
568 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
571 bool isVCSrcT_b16()
const {
572 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
575 bool isVCSrcTB16_Lo128()
const {
576 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
579 bool isVCSrcFake16B16_Lo128()
const {
580 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
583 bool isVCSrc_b16()
const {
584 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
587 bool isVCSrc_v2b16()
const {
return isVCSrc_b16(); }
589 bool isVCSrc_f32()
const {
590 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
593 bool isVCSrc_f64()
const {
594 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
597 bool isVCSrcTBF16()
const {
598 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
601 bool isVCSrcT_f16()
const {
602 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
605 bool isVCSrcT_bf16()
const {
606 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
609 bool isVCSrcTBF16_Lo128()
const {
610 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
613 bool isVCSrcTF16_Lo128()
const {
614 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
617 bool isVCSrcFake16BF16_Lo128()
const {
618 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
621 bool isVCSrcFake16F16_Lo128()
const {
622 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
625 bool isVCSrc_bf16()
const {
626 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
629 bool isVCSrc_f16()
const {
630 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
633 bool isVCSrc_v2bf16()
const {
return isVCSrc_bf16(); }
635 bool isVCSrc_v2f16()
const {
return isVCSrc_f16(); }
637 bool isVSrc_b32()
const {
638 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
641 bool isVSrc_b64()
const {
return isVCSrc_f64() || isLiteralImm(MVT::i64); }
643 bool isVSrcT_b16()
const {
return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
645 bool isVSrcT_b16_Lo128()
const {
646 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
649 bool isVSrcFake16_b16_Lo128()
const {
650 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
653 bool isVSrc_b16()
const {
return isVCSrc_b16() || isLiteralImm(MVT::i16); }
655 bool isVSrc_v2b16()
const {
return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
657 bool isVCSrcV2FP32()
const {
return isVCSrc_f64(); }
659 bool isVSrc_v2f32()
const {
return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
661 bool isVCSrc_v2b32()
const {
return isVCSrc_b64(); }
663 bool isVSrc_v2b32()
const {
return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
665 bool isVSrc_f32()
const {
666 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
669 bool isVSrc_f64()
const {
return isVCSrc_f64() || isLiteralImm(MVT::f64); }
671 bool isVSrcT_bf16()
const {
return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
673 bool isVSrcT_f16()
const {
return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
675 bool isVSrcT_bf16_Lo128()
const {
676 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
679 bool isVSrcT_f16_Lo128()
const {
680 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
683 bool isVSrcFake16_bf16_Lo128()
const {
684 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
687 bool isVSrcFake16_f16_Lo128()
const {
688 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
691 bool isVSrc_bf16()
const {
return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
693 bool isVSrc_f16()
const {
return isVCSrc_f16() || isLiteralImm(MVT::f16); }
695 bool isVSrc_v2bf16()
const {
696 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
699 bool isVSrc_v2f16()
const {
return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
701 bool isVSrc_NoInline_v2f16()
const {
return isVSrc_v2f16(); }
703 bool isVISrcB32()
const {
704 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
707 bool isVISrcB16()
const {
708 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
711 bool isVISrcV2B16()
const {
715 bool isVISrcF32()
const {
716 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
719 bool isVISrcF16()
const {
720 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
723 bool isVISrcV2F16()
const {
724 return isVISrcF16() || isVISrcB32();
727 bool isVISrc_64_bf16()
const {
728 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
731 bool isVISrc_64_f16()
const {
732 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
735 bool isVISrc_64_b32()
const {
736 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
739 bool isVISrc_64B64()
const {
740 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
743 bool isVISrc_64_f64()
const {
744 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
747 bool isVISrc_64V2FP32()
const {
748 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
751 bool isVISrc_64V2INT32()
const {
752 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
755 bool isVISrc_256_b32()
const {
756 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
759 bool isVISrc_256_f32()
const {
760 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
763 bool isVISrc_256B64()
const {
764 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
767 bool isVISrc_256_f64()
const {
768 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
771 bool isVISrc_512_f64()
const {
772 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64);
775 bool isVISrc_128B16()
const {
776 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
779 bool isVISrc_128V2B16()
const {
780 return isVISrc_128B16();
783 bool isVISrc_128_b32()
const {
784 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
787 bool isVISrc_128_f32()
const {
788 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
791 bool isVISrc_256V2FP32()
const {
792 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
795 bool isVISrc_256V2INT32()
const {
796 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
799 bool isVISrc_512_b32()
const {
800 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
803 bool isVISrc_512B16()
const {
804 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
807 bool isVISrc_512V2B16()
const {
808 return isVISrc_512B16();
811 bool isVISrc_512_f32()
const {
812 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
815 bool isVISrc_512F16()
const {
816 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
819 bool isVISrc_512V2F16()
const {
820 return isVISrc_512F16() || isVISrc_512_b32();
823 bool isVISrc_1024_b32()
const {
824 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
827 bool isVISrc_1024B16()
const {
828 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
831 bool isVISrc_1024V2B16()
const {
832 return isVISrc_1024B16();
835 bool isVISrc_1024_f32()
const {
836 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
839 bool isVISrc_1024F16()
const {
840 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
843 bool isVISrc_1024V2F16()
const {
844 return isVISrc_1024F16() || isVISrc_1024_b32();
847 bool isAISrcB32()
const {
848 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
851 bool isAISrcB16()
const {
852 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
855 bool isAISrcV2B16()
const {
859 bool isAISrcF32()
const {
860 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
863 bool isAISrcF16()
const {
864 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
867 bool isAISrcV2F16()
const {
868 return isAISrcF16() || isAISrcB32();
871 bool isAISrc_64B64()
const {
872 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
875 bool isAISrc_64_f64()
const {
876 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
879 bool isAISrc_128_b32()
const {
880 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
883 bool isAISrc_128B16()
const {
884 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
887 bool isAISrc_128V2B16()
const {
888 return isAISrc_128B16();
891 bool isAISrc_128_f32()
const {
892 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
895 bool isAISrc_128F16()
const {
896 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
899 bool isAISrc_128V2F16()
const {
900 return isAISrc_128F16() || isAISrc_128_b32();
903 bool isVISrc_128_bf16()
const {
904 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
907 bool isVISrc_128_f16()
const {
908 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
911 bool isVISrc_128V2F16()
const {
912 return isVISrc_128_f16() || isVISrc_128_b32();
915 bool isAISrc_256B64()
const {
916 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
919 bool isAISrc_256_f64()
const {
920 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
923 bool isAISrc_512_b32()
const {
924 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
927 bool isAISrc_512B16()
const {
928 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
931 bool isAISrc_512V2B16()
const {
932 return isAISrc_512B16();
935 bool isAISrc_512_f32()
const {
936 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
939 bool isAISrc_512F16()
const {
940 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
943 bool isAISrc_512V2F16()
const {
944 return isAISrc_512F16() || isAISrc_512_b32();
947 bool isAISrc_1024_b32()
const {
948 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
951 bool isAISrc_1024B16()
const {
952 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
955 bool isAISrc_1024V2B16()
const {
956 return isAISrc_1024B16();
959 bool isAISrc_1024_f32()
const {
960 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
963 bool isAISrc_1024F16()
const {
964 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
967 bool isAISrc_1024V2F16()
const {
968 return isAISrc_1024F16() || isAISrc_1024_b32();
971 bool isKImmFP32()
const {
972 return isLiteralImm(MVT::f32);
975 bool isKImmFP16()
const {
976 return isLiteralImm(MVT::f16);
979 bool isKImmFP64()
const {
return isLiteralImm(MVT::f64); }
981 bool isMem()
const override {
985 bool isExpr()
const {
989 bool isSOPPBrTarget()
const {
return isExpr() ||
isImm(); }
991 bool isSWaitCnt()
const;
992 bool isDepCtr()
const;
993 bool isSDelayALU()
const;
994 bool isHwreg()
const;
995 bool isSendMsg()
const;
996 bool isSplitBarrier()
const;
997 bool isSwizzle()
const;
998 bool isSMRDOffset8()
const;
999 bool isSMEMOffset()
const;
1000 bool isSMRDLiteralOffset()
const;
1001 bool isDPP8()
const;
1002 bool isDPPCtrl()
const;
1003 bool isBLGP()
const;
1004 bool isGPRIdxMode()
const;
1005 bool isS16Imm()
const;
1006 bool isU16Imm()
const;
1007 bool isEndpgm()
const;
1009 auto getPredicate(std::function<
bool(
const AMDGPUOperand &
Op)>
P)
const {
1010 return [
this,
P]() {
return P(*
this); };
1023 void setImm(int64_t Val) {
1028 ImmTy getImmTy()
const {
1038 SMLoc getStartLoc()
const override {
1042 SMLoc getEndLoc()
const override {
1047 return SMRange(StartLoc, EndLoc);
1050 Modifiers getModifiers()
const {
1051 assert(isRegKind() || isImmTy(ImmTyNone));
1052 return isRegKind() ?
Reg.Mods :
Imm.Mods;
1055 void setModifiers(Modifiers Mods) {
1056 assert(isRegKind() || isImmTy(ImmTyNone));
1063 bool hasModifiers()
const {
1064 return getModifiers().hasModifiers();
1067 bool hasFPModifiers()
const {
1068 return getModifiers().hasFPModifiers();
1071 bool hasIntModifiers()
const {
1072 return getModifiers().hasIntModifiers();
1077 void addImmOperands(
MCInst &Inst,
unsigned N,
bool ApplyModifiers =
true)
const;
1079 void addLiteralImmOperand(
MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const;
1081 void addRegOperands(
MCInst &Inst,
unsigned N)
const;
1083 void addRegOrImmOperands(
MCInst &Inst,
unsigned N)
const {
1085 addRegOperands(Inst,
N);
1087 addImmOperands(Inst,
N);
1090 void addRegOrImmWithInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1091 Modifiers Mods = getModifiers();
1094 addRegOperands(Inst,
N);
1096 addImmOperands(Inst,
N,
false);
1100 void addRegOrImmWithFPInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1101 assert(!hasIntModifiers());
1102 addRegOrImmWithInputModsOperands(Inst,
N);
1105 void addRegOrImmWithIntInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1106 assert(!hasFPModifiers());
1107 addRegOrImmWithInputModsOperands(Inst,
N);
1110 void addRegWithInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1111 Modifiers Mods = getModifiers();
1114 addRegOperands(Inst,
N);
1117 void addRegWithFPInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1118 assert(!hasIntModifiers());
1119 addRegWithInputModsOperands(Inst,
N);
1122 void addRegWithIntInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1123 assert(!hasFPModifiers());
1124 addRegWithInputModsOperands(Inst,
N);
1130 case ImmTyNone:
OS <<
"None";
break;
1131 case ImmTyGDS:
OS <<
"GDS";
break;
1132 case ImmTyLDS:
OS <<
"LDS";
break;
1133 case ImmTyOffen:
OS <<
"Offen";
break;
1134 case ImmTyIdxen:
OS <<
"Idxen";
break;
1135 case ImmTyAddr64:
OS <<
"Addr64";
break;
1136 case ImmTyOffset:
OS <<
"Offset";
break;
1137 case ImmTyInstOffset:
OS <<
"InstOffset";
break;
1138 case ImmTyOffset0:
OS <<
"Offset0";
break;
1139 case ImmTyOffset1:
OS <<
"Offset1";
break;
1140 case ImmTySMEMOffsetMod:
OS <<
"SMEMOffsetMod";
break;
1141 case ImmTyCPol:
OS <<
"CPol";
break;
1142 case ImmTyIndexKey8bit:
OS <<
"index_key";
break;
1143 case ImmTyIndexKey16bit:
OS <<
"index_key";
break;
1144 case ImmTyIndexKey32bit:
OS <<
"index_key";
break;
1145 case ImmTyTFE:
OS <<
"TFE";
break;
1146 case ImmTyD16:
OS <<
"D16";
break;
1147 case ImmTyFORMAT:
OS <<
"FORMAT";
break;
1148 case ImmTyClamp:
OS <<
"Clamp";
break;
1149 case ImmTyOModSI:
OS <<
"OModSI";
break;
1150 case ImmTyDPP8:
OS <<
"DPP8";
break;
1151 case ImmTyDppCtrl:
OS <<
"DppCtrl";
break;
1152 case ImmTyDppRowMask:
OS <<
"DppRowMask";
break;
1153 case ImmTyDppBankMask:
OS <<
"DppBankMask";
break;
1154 case ImmTyDppBoundCtrl:
OS <<
"DppBoundCtrl";
break;
1155 case ImmTyDppFI:
OS <<
"DppFI";
break;
1156 case ImmTySDWADstSel:
OS <<
"SDWADstSel";
break;
1157 case ImmTySDWASrc0Sel:
OS <<
"SDWASrc0Sel";
break;
1158 case ImmTySDWASrc1Sel:
OS <<
"SDWASrc1Sel";
break;
1159 case ImmTySDWADstUnused:
OS <<
"SDWADstUnused";
break;
1160 case ImmTyDMask:
OS <<
"DMask";
break;
1161 case ImmTyDim:
OS <<
"Dim";
break;
1162 case ImmTyUNorm:
OS <<
"UNorm";
break;
1163 case ImmTyDA:
OS <<
"DA";
break;
1164 case ImmTyR128A16:
OS <<
"R128A16";
break;
1165 case ImmTyA16:
OS <<
"A16";
break;
1166 case ImmTyLWE:
OS <<
"LWE";
break;
1167 case ImmTyOff:
OS <<
"Off";
break;
1168 case ImmTyExpTgt:
OS <<
"ExpTgt";
break;
1169 case ImmTyExpCompr:
OS <<
"ExpCompr";
break;
1170 case ImmTyExpVM:
OS <<
"ExpVM";
break;
1171 case ImmTyHwreg:
OS <<
"Hwreg";
break;
1172 case ImmTySendMsg:
OS <<
"SendMsg";
break;
1173 case ImmTyInterpSlot:
OS <<
"InterpSlot";
break;
1174 case ImmTyInterpAttr:
OS <<
"InterpAttr";
break;
1175 case ImmTyInterpAttrChan:
OS <<
"InterpAttrChan";
break;
1176 case ImmTyOpSel:
OS <<
"OpSel";
break;
1177 case ImmTyOpSelHi:
OS <<
"OpSelHi";
break;
1178 case ImmTyNegLo:
OS <<
"NegLo";
break;
1179 case ImmTyNegHi:
OS <<
"NegHi";
break;
1180 case ImmTySwizzle:
OS <<
"Swizzle";
break;
1181 case ImmTyGprIdxMode:
OS <<
"GprIdxMode";
break;
1182 case ImmTyHigh:
OS <<
"High";
break;
1183 case ImmTyBLGP:
OS <<
"BLGP";
break;
1184 case ImmTyCBSZ:
OS <<
"CBSZ";
break;
1185 case ImmTyABID:
OS <<
"ABID";
break;
1186 case ImmTyEndpgm:
OS <<
"Endpgm";
break;
1187 case ImmTyWaitVDST:
OS <<
"WaitVDST";
break;
1188 case ImmTyWaitEXP:
OS <<
"WaitEXP";
break;
1189 case ImmTyWaitVAVDst:
OS <<
"WaitVAVDst";
break;
1190 case ImmTyWaitVMVSrc:
OS <<
"WaitVMVSrc";
break;
1191 case ImmTyBitOp3:
OS <<
"BitOp3";
break;
1192 case ImmTyMatrixAFMT:
OS <<
"ImmTyMatrixAFMT";
break;
1193 case ImmTyMatrixBFMT:
OS <<
"ImmTyMatrixBFMT";
break;
1194 case ImmTyMatrixAScale:
OS <<
"ImmTyMatrixAScale";
break;
1195 case ImmTyMatrixBScale:
OS <<
"ImmTyMatrixBScale";
break;
1196 case ImmTyMatrixAScaleFmt:
OS <<
"ImmTyMatrixAScaleFmt";
break;
1197 case ImmTyMatrixBScaleFmt:
OS <<
"ImmTyMatrixBScaleFmt";
break;
1198 case ImmTyMatrixAReuse:
OS <<
"ImmTyMatrixAReuse";
break;
1199 case ImmTyMatrixBReuse:
OS <<
"ImmTyMatrixBReuse";
break;
1200 case ImmTyScaleSel:
OS <<
"ScaleSel" ;
break;
1201 case ImmTyByteSel:
OS <<
"ByteSel" ;
break;
1210 <<
" mods: " <<
Reg.Mods <<
'>';
1214 if (getImmTy() != ImmTyNone) {
1215 OS <<
" type: "; printImmTy(
OS, getImmTy());
1217 OS <<
" mods: " <<
Imm.Mods <<
'>';
1220 OS <<
'\'' << getToken() <<
'\'';
1230 static AMDGPUOperand::Ptr CreateImm(
const AMDGPUAsmParser *AsmParser,
1231 int64_t Val,
SMLoc Loc,
1232 ImmTy
Type = ImmTyNone,
1233 bool IsFPImm =
false) {
1234 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1236 Op->Imm.IsFPImm = IsFPImm;
1237 Op->Imm.Kind = ImmKindTyNone;
1239 Op->Imm.Mods = Modifiers();
1245 static AMDGPUOperand::Ptr CreateToken(
const AMDGPUAsmParser *AsmParser,
1247 bool HasExplicitEncodingSize =
true) {
1248 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1249 Res->Tok.Data = Str.data();
1250 Res->Tok.Length = Str.size();
1251 Res->StartLoc = Loc;
1256 static AMDGPUOperand::Ptr CreateReg(
const AMDGPUAsmParser *AsmParser,
1258 auto Op = std::make_unique<AMDGPUOperand>(
Register, AsmParser);
1259 Op->Reg.RegNo =
Reg;
1260 Op->Reg.Mods = Modifiers();
1266 static AMDGPUOperand::Ptr CreateExpr(
const AMDGPUAsmParser *AsmParser,
1268 auto Op = std::make_unique<AMDGPUOperand>(
Expression, AsmParser);
1277 OS <<
"abs:" << Mods.Abs <<
" neg: " << Mods.Neg <<
" sext:" << Mods.Sext;
1288class KernelScopeInfo {
1289 int SgprIndexUnusedMin = -1;
1290 int VgprIndexUnusedMin = -1;
1291 int AgprIndexUnusedMin = -1;
1295 void usesSgprAt(
int i) {
1296 if (i >= SgprIndexUnusedMin) {
1297 SgprIndexUnusedMin = ++i;
1306 void usesVgprAt(
int i) {
1307 if (i >= VgprIndexUnusedMin) {
1308 VgprIndexUnusedMin = ++i;
1313 VgprIndexUnusedMin);
1319 void usesAgprAt(
int i) {
1324 if (i >= AgprIndexUnusedMin) {
1325 AgprIndexUnusedMin = ++i;
1335 VgprIndexUnusedMin);
1342 KernelScopeInfo() =
default;
1348 usesSgprAt(SgprIndexUnusedMin = -1);
1349 usesVgprAt(VgprIndexUnusedMin = -1);
1351 usesAgprAt(AgprIndexUnusedMin = -1);
1355 void usesRegister(RegisterKind RegKind,
unsigned DwordRegIndex,
1356 unsigned RegWidth) {
1359 usesSgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1362 usesAgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1365 usesVgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1376 unsigned ForcedEncodingSize = 0;
1377 bool ForcedDPP =
false;
1378 bool ForcedSDWA =
false;
1379 KernelScopeInfo KernelScope;
1384#define GET_ASSEMBLER_HEADER
1385#include "AMDGPUGenAsmMatcher.inc"
1390 void createConstantSymbol(
StringRef Id, int64_t Val);
1392 bool ParseAsAbsoluteExpression(
uint32_t &Ret);
1410 const MCExpr *FlatScrUsed,
bool XNACKUsed,
1411 std::optional<bool> EnableWavefrontSize32,
1415 bool ParseDirectiveAMDGCNTarget();
1416 bool ParseDirectiveAMDHSACodeObjectVersion();
1417 bool ParseDirectiveAMDHSAKernel();
1419 bool ParseDirectiveAMDKernelCodeT();
1422 bool ParseDirectiveAMDGPUHsaKernel();
1424 bool ParseDirectiveISAVersion();
1425 bool ParseDirectiveHSAMetadata();
1426 bool ParseDirectivePALMetadataBegin();
1427 bool ParseDirectivePALMetadata();
1428 bool ParseDirectiveAMDGPULDS();
1432 bool ParseToEndDirective(
const char *AssemblerDirectiveBegin,
1433 const char *AssemblerDirectiveEnd,
1434 std::string &CollectString);
1436 bool AddNextRegisterToList(
MCRegister &Reg,
unsigned &RegWidth,
1438 bool ParseAMDGPURegister(RegisterKind &RegKind,
MCRegister &Reg,
1439 unsigned &RegNum,
unsigned &RegWidth,
1440 bool RestoreOnFailure =
false);
1441 bool ParseAMDGPURegister(RegisterKind &RegKind,
MCRegister &Reg,
1442 unsigned &RegNum,
unsigned &RegWidth,
1444 MCRegister ParseRegularReg(RegisterKind &RegKind,
unsigned &RegNum,
1447 MCRegister ParseSpecialReg(RegisterKind &RegKind,
unsigned &RegNum,
1450 MCRegister ParseRegList(RegisterKind &RegKind,
unsigned &RegNum,
1453 bool ParseRegRange(
unsigned &Num,
unsigned &Width,
unsigned &
SubReg);
1454 MCRegister getRegularReg(RegisterKind RegKind,
unsigned RegNum,
1459 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1460 void initializeGprCountSymbol(RegisterKind RegKind);
1461 bool updateGprCountSymbols(RegisterKind RegKind,
unsigned DwordRegIndex,
1468 OperandMode_Default,
1472 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1480 if (getFeatureBits().
none()) {
1486 if (!FB[AMDGPU::FeatureWavefrontSize64] &&
1487 !FB[AMDGPU::FeatureWavefrontSize32]) {
1498 createConstantSymbol(
".amdgcn.gfx_generation_number",
ISA.Major);
1499 createConstantSymbol(
".amdgcn.gfx_generation_minor",
ISA.Minor);
1500 createConstantSymbol(
".amdgcn.gfx_generation_stepping",
ISA.Stepping);
1502 createConstantSymbol(
".option.machine_version_major",
ISA.Major);
1503 createConstantSymbol(
".option.machine_version_minor",
ISA.Minor);
1504 createConstantSymbol(
".option.machine_version_stepping",
ISA.Stepping);
1507 initializeGprCountSymbol(IS_VGPR);
1508 initializeGprCountSymbol(IS_SGPR);
1513 createConstantSymbol(Symbol, Code);
1515 createConstantSymbol(
"UC_VERSION_W64_BIT", 0x2000);
1516 createConstantSymbol(
"UC_VERSION_W32_BIT", 0x4000);
1517 createConstantSymbol(
"UC_VERSION_MDP_BIT", 0x8000);
1589 bool hasInv2PiInlineImm()
const {
1590 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1593 bool has64BitLiterals()
const {
1594 return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1597 bool hasFlatOffsets()
const {
1598 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1601 bool hasTrue16Insts()
const {
1602 return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1606 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1609 bool hasSGPR102_SGPR103()
const {
1613 bool hasSGPR104_SGPR105()
const {
return isGFX10Plus(); }
1615 bool hasIntClamp()
const {
1616 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1619 bool hasPartialNSAEncoding()
const {
1620 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1623 bool hasGloballyAddressableScratch()
const {
1624 return getFeatureBits()[AMDGPU::FeatureGloballyAddressableScratch];
1656 void setForcedEncodingSize(
unsigned Size) { ForcedEncodingSize =
Size; }
1657 void setForcedDPP(
bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1658 void setForcedSDWA(
bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1660 unsigned getForcedEncodingSize()
const {
return ForcedEncodingSize; }
1661 bool isForcedVOP3()
const {
return ForcedEncodingSize == 64; }
1662 bool isForcedDPP()
const {
return ForcedDPP; }
1663 bool isForcedSDWA()
const {
return ForcedSDWA; }
1665 StringRef getMatchedVariantName()
const;
1667 std::unique_ptr<AMDGPUOperand>
parseRegister(
bool RestoreOnFailure =
false);
1669 bool RestoreOnFailure);
1672 SMLoc &EndLoc)
override;
1675 unsigned Kind)
override;
1679 bool MatchingInlineAsm)
override;
1682 OperandMode Mode = OperandMode_Default);
1690 ParseStatus parseIntWithPrefix(
const char *Prefix, int64_t &
Int);
1694 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1695 std::function<
bool(int64_t &)> ConvertResult =
nullptr);
1699 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1700 bool (*ConvertResult)(int64_t &) =
nullptr);
1704 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1718 AMDGPUOperand::ImmTy
Type);
1721 bool isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1722 bool isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1723 bool isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1724 bool isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1725 bool parseSP3NegModifier();
1727 bool HasLit =
false,
bool HasLit64 =
false);
1730 bool HasLit =
false,
bool HasLit64 =
false);
1732 bool AllowImm =
true);
1734 bool AllowImm =
true);
1739 AMDGPUOperand::ImmTy ImmTy);
1744 AMDGPUOperand::ImmTy
Type);
1748 AMDGPUOperand::ImmTy
Type);
1752 AMDGPUOperand::ImmTy
Type);
1763 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1768 bool tryParseFmt(
const char *Pref, int64_t MaxVal, int64_t &Val);
1769 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt,
StringRef FormatStr,
SMLoc Loc);
1773 bool parseCnt(int64_t &IntVal);
1776 bool parseDepCtr(int64_t &IntVal,
unsigned &Mask);
1780 bool parseDelay(int64_t &Delay);
1786 struct OperandInfoTy {
1789 bool IsSymbolic =
false;
1790 bool IsDefined =
false;
1792 OperandInfoTy(int64_t Val) : Val(Val) {}
1795 struct StructuredOpField : OperandInfoTy {
1799 bool IsDefined =
false;
1804 virtual ~StructuredOpField() =
default;
1806 bool Error(AMDGPUAsmParser &Parser,
const Twine &Err)
const {
1807 Parser.Error(Loc,
"invalid " +
Desc +
": " + Err);
1811 virtual bool validate(AMDGPUAsmParser &Parser)
const {
1813 return Error(Parser,
"not supported on this GPU");
1815 return Error(Parser,
"only " +
Twine(Width) +
"-bit values are legal");
1823 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &
Op, OperandInfoTy &Stream);
1824 bool validateSendMsg(
const OperandInfoTy &Msg,
1825 const OperandInfoTy &
Op,
1826 const OperandInfoTy &Stream);
1829 OperandInfoTy &Width);
1835 SMLoc getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
1840 bool SearchMandatoryLiterals =
false)
const;
1849 bool validateSOPLiteral(
const MCInst &Inst)
const;
1851 std::optional<unsigned> checkVOPDRegBankConstraints(
const MCInst &Inst,
1854 bool tryVOPD(
const MCInst &Inst);
1855 bool tryVOPD3(
const MCInst &Inst);
1856 bool tryAnotherVOPDEncoding(
const MCInst &Inst);
1858 bool validateIntClampSupported(
const MCInst &Inst);
1859 bool validateMIMGAtomicDMask(
const MCInst &Inst);
1860 bool validateMIMGGatherDMask(
const MCInst &Inst);
1862 bool validateMIMGDataSize(
const MCInst &Inst,
const SMLoc &IDLoc);
1863 bool validateMIMGAddrSize(
const MCInst &Inst,
const SMLoc &IDLoc);
1864 bool validateMIMGD16(
const MCInst &Inst);
1866 bool validateTensorR128(
const MCInst &Inst);
1867 bool validateMIMGMSAA(
const MCInst &Inst);
1868 bool validateOpSel(
const MCInst &Inst);
1869 bool validateTrue16OpSel(
const MCInst &Inst);
1870 bool validateNeg(
const MCInst &Inst, AMDGPU::OpName
OpName);
1872 bool validateVccOperand(
MCRegister Reg)
const;
1877 bool validateAGPRLdSt(
const MCInst &Inst)
const;
1878 bool validateVGPRAlign(
const MCInst &Inst)
const;
1882 bool validateDivScale(
const MCInst &Inst);
1885 const SMLoc &IDLoc);
1887 const unsigned CPol);
1889 std::optional<StringRef> validateLdsDirect(
const MCInst &Inst);
1891 unsigned getConstantBusLimit(
unsigned Opcode)
const;
1892 bool usesConstantBus(
const MCInst &Inst,
unsigned OpIdx);
1893 bool isInlineConstant(
const MCInst &Inst,
unsigned OpIdx)
const;
1894 unsigned findImplicitSGPRReadInVOP(
const MCInst &Inst)
const;
1920 AsmToken peekToken(
bool ShouldSkipSpace =
true);
1922 SMLoc getLoc()
const;
1926 void onBeginOfFile()
override;
1927 bool parsePrimaryExpr(
const MCExpr *&Res,
SMLoc &EndLoc)
override;
1938 bool parseSwizzleOperand(int64_t &
Op,
const unsigned MinVal,
1939 const unsigned MaxVal,
const Twine &ErrMsg,
1941 bool parseSwizzleOperands(
const unsigned OpNum, int64_t*
Op,
1942 const unsigned MinVal,
1943 const unsigned MaxVal,
1946 bool parseSwizzleOffset(int64_t &
Imm);
1947 bool parseSwizzleMacro(int64_t &
Imm);
1948 bool parseSwizzleQuadPerm(int64_t &
Imm);
1949 bool parseSwizzleBitmaskPerm(int64_t &
Imm);
1950 bool parseSwizzleBroadcast(int64_t &
Imm);
1951 bool parseSwizzleSwap(int64_t &
Imm);
1952 bool parseSwizzleReverse(int64_t &
Imm);
1953 bool parseSwizzleFFT(int64_t &
Imm);
1954 bool parseSwizzleRotate(int64_t &
Imm);
1957 int64_t parseGPRIdxMacro();
1965 OptionalImmIndexMap &OptionalIdx);
1974 OptionalImmIndexMap &OptionalIdx);
1976 OptionalImmIndexMap &OptionalIdx);
1981 bool parseDimId(
unsigned &Encoding);
1983 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1987 int64_t parseDPPCtrlSel(
StringRef Ctrl);
1988 int64_t parseDPPCtrlPerm();
1994 bool IsDPP8 =
false);
2000 AMDGPUOperand::ImmTy
Type);
2009 bool SkipDstVcc =
false,
2010 bool SkipSrcVcc =
false);
2023 return &APFloat::IEEEsingle();
2025 return &APFloat::IEEEdouble();
2027 return &APFloat::IEEEhalf();
2055 return &APFloat::IEEEsingle();
2062 return &APFloat::IEEEdouble();
2069 return &APFloat::IEEEhalf();
2074 return &APFloat::BFloat();
2089 APFloat::rmNearestTiesToEven,
2092 if (
Status != APFloat::opOK &&
2094 ((
Status & APFloat::opOverflow) != 0 ||
2095 (
Status & APFloat::opUnderflow) != 0)) {
2118bool AMDGPUOperand::isInlinableImm(
MVT type)
const {
2128 if (!isImmTy(ImmTyNone)) {
2139 if (type == MVT::f64 || type == MVT::i64) {
2141 AsmParser->hasInv2PiInlineImm());
2163 APFloat::rmNearestTiesToEven, &Lost);
2170 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2172 AsmParser->hasInv2PiInlineImm());
2177 static_cast<int32_t
>(FPLiteral.bitcastToAPInt().getZExtValue()),
2178 AsmParser->hasInv2PiInlineImm());
2182 if (type == MVT::f64 || type == MVT::i64) {
2184 AsmParser->hasInv2PiInlineImm());
2193 static_cast<int16_t
>(
Literal.getLoBits(16).getSExtValue()),
2194 type, AsmParser->hasInv2PiInlineImm());
2198 static_cast<int32_t
>(
Literal.getLoBits(32).getZExtValue()),
2199 AsmParser->hasInv2PiInlineImm());
2202bool AMDGPUOperand::isLiteralImm(
MVT type)
const {
2204 if (!isImmTy(ImmTyNone)) {
2209 (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();
2214 if (type == MVT::f64 && hasFPModifiers()) {
2234 if (type == MVT::f64) {
2239 if (type == MVT::i64) {
2252 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2253 : (type == MVT::v2i16) ? MVT::f32
2254 : (type == MVT::v2f32) ? MVT::f32
2261bool AMDGPUOperand::isRegClass(
unsigned RCID)
const {
2262 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(
getReg());
2265bool AMDGPUOperand::isVRegWithInputMods()
const {
2266 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2268 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2269 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2272template <
bool IsFake16>
2273bool AMDGPUOperand::isT16_Lo128VRegWithInputMods()
const {
2274 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2275 : AMDGPU::VGPR_16_Lo128RegClassID);
2278template <
bool IsFake16>
bool AMDGPUOperand::isT16VRegWithInputMods()
const {
2279 return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2280 : AMDGPU::VGPR_16RegClassID);
2283bool AMDGPUOperand::isSDWAOperand(
MVT type)
const {
2284 if (AsmParser->isVI())
2286 if (AsmParser->isGFX9Plus())
2287 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2291bool AMDGPUOperand::isSDWAFP16Operand()
const {
2292 return isSDWAOperand(MVT::f16);
2295bool AMDGPUOperand::isSDWAFP32Operand()
const {
2296 return isSDWAOperand(MVT::f32);
2299bool AMDGPUOperand::isSDWAInt16Operand()
const {
2300 return isSDWAOperand(MVT::i16);
2303bool AMDGPUOperand::isSDWAInt32Operand()
const {
2304 return isSDWAOperand(MVT::i32);
2307bool AMDGPUOperand::isBoolReg()
const {
2308 auto FB = AsmParser->getFeatureBits();
2309 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
2310 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2315 assert(isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2330void AMDGPUOperand::addImmOperands(
MCInst &Inst,
unsigned N,
bool ApplyModifiers)
const {
2338 addLiteralImmOperand(Inst,
Imm.Val,
2340 isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2342 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2348void AMDGPUOperand::addLiteralImmOperand(
MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const {
2349 const auto& InstDesc = AsmParser->getMII()->get(Inst.
getOpcode());
2354 if (ApplyModifiers) {
2357 Val = applyInputFPModifiers(Val,
Size);
2361 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2371 AsmParser->hasInv2PiInlineImm())) {
2380 bool HasMandatoryLiteral =
2383 if (
Literal.getLoBits(32) != 0 &&
2384 (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
2385 !HasMandatoryLiteral) {
2386 const_cast<AMDGPUAsmParser *
>(AsmParser)->
Warning(
2388 "Can't encode literal as exact 64-bit floating-point operand. "
2389 "Low 32-bits will be set to zero");
2390 Val &= 0xffffffff00000000u;
2394 setImmKindLiteral();
2405 setImmKindMandatoryLiteral();
2412 if (AsmParser->hasInv2PiInlineImm() &&
Literal == 0x3fc45f306725feed) {
2418 setImmKindLiteral();
2447 APFloat::rmNearestTiesToEven, &lost);
2451 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2454 setImmKindMandatoryLiteral();
2456 setImmKindLiteral();
2484 AsmParser->hasInv2PiInlineImm())) {
2494 setImmKindLiteral();
2509 if (!AsmParser->has64BitLiterals() || getModifiers().Lit)
2513 setImmKindLiteral();
2527 if (!AsmParser->has64BitLiterals()) {
2528 Val =
static_cast<uint64_t>(Val) << 32;
2535 if (getModifiers().Lit ||
2536 (!getModifiers().Lit64 && (isInt<32>(Val) || isUInt<32>(Val))))
2537 Val =
static_cast<uint64_t>(Val) << 32;
2541 setImmKindLiteral();
2554 setImmKindLiteral();
2561 AsmParser->hasInv2PiInlineImm())) {
2568 setImmKindLiteral();
2575 AsmParser->hasInv2PiInlineImm())) {
2582 setImmKindLiteral();
2594 AsmParser->hasInv2PiInlineImm()));
2603 AsmParser->hasInv2PiInlineImm()));
2611 setImmKindMandatoryLiteral();
2615 setImmKindMandatoryLiteral();
2618 if ((isInt<32>(Val) || isUInt<32>(Val)) && !getModifiers().Lit64)
2622 setImmKindMandatoryLiteral();
2629void AMDGPUOperand::addRegOperands(
MCInst &Inst,
unsigned N)
const {
2633bool AMDGPUOperand::isInlineValue()
const {
2641void AMDGPUAsmParser::createConstantSymbol(
StringRef Id, int64_t Val) {
2652 if (Is == IS_VGPR) {
2656 return AMDGPU::VGPR_32RegClassID;
2658 return AMDGPU::VReg_64RegClassID;
2660 return AMDGPU::VReg_96RegClassID;
2662 return AMDGPU::VReg_128RegClassID;
2664 return AMDGPU::VReg_160RegClassID;
2666 return AMDGPU::VReg_192RegClassID;
2668 return AMDGPU::VReg_224RegClassID;
2670 return AMDGPU::VReg_256RegClassID;
2672 return AMDGPU::VReg_288RegClassID;
2674 return AMDGPU::VReg_320RegClassID;
2676 return AMDGPU::VReg_352RegClassID;
2678 return AMDGPU::VReg_384RegClassID;
2680 return AMDGPU::VReg_512RegClassID;
2682 return AMDGPU::VReg_1024RegClassID;
2684 }
else if (Is == IS_TTMP) {
2688 return AMDGPU::TTMP_32RegClassID;
2690 return AMDGPU::TTMP_64RegClassID;
2692 return AMDGPU::TTMP_128RegClassID;
2694 return AMDGPU::TTMP_256RegClassID;
2696 return AMDGPU::TTMP_512RegClassID;
2698 }
else if (Is == IS_SGPR) {
2702 return AMDGPU::SGPR_32RegClassID;
2704 return AMDGPU::SGPR_64RegClassID;
2706 return AMDGPU::SGPR_96RegClassID;
2708 return AMDGPU::SGPR_128RegClassID;
2710 return AMDGPU::SGPR_160RegClassID;
2712 return AMDGPU::SGPR_192RegClassID;
2714 return AMDGPU::SGPR_224RegClassID;
2716 return AMDGPU::SGPR_256RegClassID;
2718 return AMDGPU::SGPR_288RegClassID;
2720 return AMDGPU::SGPR_320RegClassID;
2722 return AMDGPU::SGPR_352RegClassID;
2724 return AMDGPU::SGPR_384RegClassID;
2726 return AMDGPU::SGPR_512RegClassID;
2728 }
else if (Is == IS_AGPR) {
2732 return AMDGPU::AGPR_32RegClassID;
2734 return AMDGPU::AReg_64RegClassID;
2736 return AMDGPU::AReg_96RegClassID;
2738 return AMDGPU::AReg_128RegClassID;
2740 return AMDGPU::AReg_160RegClassID;
2742 return AMDGPU::AReg_192RegClassID;
2744 return AMDGPU::AReg_224RegClassID;
2746 return AMDGPU::AReg_256RegClassID;
2748 return AMDGPU::AReg_288RegClassID;
2750 return AMDGPU::AReg_320RegClassID;
2752 return AMDGPU::AReg_352RegClassID;
2754 return AMDGPU::AReg_384RegClassID;
2756 return AMDGPU::AReg_512RegClassID;
2758 return AMDGPU::AReg_1024RegClassID;
2766 .
Case(
"exec", AMDGPU::EXEC)
2767 .
Case(
"vcc", AMDGPU::VCC)
2768 .
Case(
"flat_scratch", AMDGPU::FLAT_SCR)
2769 .
Case(
"xnack_mask", AMDGPU::XNACK_MASK)
2770 .
Case(
"shared_base", AMDGPU::SRC_SHARED_BASE)
2771 .
Case(
"src_shared_base", AMDGPU::SRC_SHARED_BASE)
2772 .
Case(
"shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2773 .
Case(
"src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2774 .
Case(
"private_base", AMDGPU::SRC_PRIVATE_BASE)
2775 .
Case(
"src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2776 .
Case(
"private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2777 .
Case(
"src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2778 .
Case(
"src_flat_scratch_base_lo", AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)
2779 .
Case(
"src_flat_scratch_base_hi", AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)
2780 .
Case(
"pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2781 .
Case(
"src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2782 .
Case(
"lds_direct", AMDGPU::LDS_DIRECT)
2783 .
Case(
"src_lds_direct", AMDGPU::LDS_DIRECT)
2784 .
Case(
"m0", AMDGPU::M0)
2785 .
Case(
"vccz", AMDGPU::SRC_VCCZ)
2786 .
Case(
"src_vccz", AMDGPU::SRC_VCCZ)
2787 .
Case(
"execz", AMDGPU::SRC_EXECZ)
2788 .
Case(
"src_execz", AMDGPU::SRC_EXECZ)
2789 .
Case(
"scc", AMDGPU::SRC_SCC)
2790 .
Case(
"src_scc", AMDGPU::SRC_SCC)
2791 .
Case(
"tba", AMDGPU::TBA)
2792 .
Case(
"tma", AMDGPU::TMA)
2793 .
Case(
"flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2794 .
Case(
"flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2795 .
Case(
"xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2796 .
Case(
"xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2797 .
Case(
"vcc_lo", AMDGPU::VCC_LO)
2798 .
Case(
"vcc_hi", AMDGPU::VCC_HI)
2799 .
Case(
"exec_lo", AMDGPU::EXEC_LO)
2800 .
Case(
"exec_hi", AMDGPU::EXEC_HI)
2801 .
Case(
"tma_lo", AMDGPU::TMA_LO)
2802 .
Case(
"tma_hi", AMDGPU::TMA_HI)
2803 .
Case(
"tba_lo", AMDGPU::TBA_LO)
2804 .
Case(
"tba_hi", AMDGPU::TBA_HI)
2805 .
Case(
"pc", AMDGPU::PC_REG)
2806 .
Case(
"null", AMDGPU::SGPR_NULL)
2810bool AMDGPUAsmParser::ParseRegister(
MCRegister &RegNo,
SMLoc &StartLoc,
2811 SMLoc &EndLoc,
bool RestoreOnFailure) {
2812 auto R = parseRegister();
2813 if (!R)
return true;
2815 RegNo =
R->getReg();
2816 StartLoc =
R->getStartLoc();
2817 EndLoc =
R->getEndLoc();
2823 return ParseRegister(Reg, StartLoc, EndLoc,
false);
2828 bool Result = ParseRegister(Reg, StartLoc, EndLoc,
true);
2829 bool PendingErrors = getParser().hasPendingError();
2830 getParser().clearPendingErrors();
2838bool AMDGPUAsmParser::AddNextRegisterToList(
MCRegister &Reg,
unsigned &RegWidth,
2839 RegisterKind RegKind,
2843 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2848 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2849 Reg = AMDGPU::FLAT_SCR;
2853 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2854 Reg = AMDGPU::XNACK_MASK;
2858 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2863 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2868 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2873 Error(Loc,
"register does not fit in the list");
2879 if (Reg1 != Reg + RegWidth / 32) {
2880 Error(Loc,
"registers in a list must have consecutive indices");
2898 {{
"ttmp"}, IS_TTMP},
2904 return Kind == IS_VGPR ||
2912 if (Str.starts_with(Reg.Name))
2918 return !Str.getAsInteger(10, Num);
2922AMDGPUAsmParser::isRegister(
const AsmToken &Token,
2939 if (!RegSuffix.
empty()) {
2957AMDGPUAsmParser::isRegister()
2959 return isRegister(getToken(), peekToken());
2962MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
unsigned RegNum,
2963 unsigned SubReg,
unsigned RegWidth,
2967 unsigned AlignSize = 1;
2968 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2974 if (RegNum % AlignSize != 0) {
2975 Error(Loc,
"invalid register alignment");
2979 unsigned RegIdx = RegNum / AlignSize;
2982 Error(Loc,
"invalid or unsupported register size");
2989 Error(Loc,
"register index is out of range");
3000 assert(Reg &&
"Invalid subregister!");
3006bool AMDGPUAsmParser::ParseRegRange(
unsigned &Num,
unsigned &RegWidth,
3008 int64_t RegLo, RegHi;
3012 SMLoc FirstIdxLoc = getLoc();
3019 SecondIdxLoc = getLoc();
3029 if (!isUInt<32>(RegLo)) {
3030 Error(FirstIdxLoc,
"invalid register index");
3034 if (!isUInt<32>(RegHi)) {
3035 Error(SecondIdxLoc,
"invalid register index");
3039 if (RegLo > RegHi) {
3040 Error(FirstIdxLoc,
"first register index should not exceed second index");
3044 if (RegHi == RegLo) {
3046 if (RegSuffix ==
".l") {
3049 }
else if (RegSuffix ==
".h") {
3055 Num =
static_cast<unsigned>(RegLo);
3056 RegWidth = 32 * ((RegHi - RegLo) + 1);
3061MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
3070 RegKind = IS_SPECIAL;
3077MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
3083 auto Loc = getLoc();
3087 Error(Loc,
"invalid register name");
3096 unsigned SubReg = NoSubRegister;
3097 if (!RegSuffix.
empty()) {
3105 Error(Loc,
"invalid register index");
3111 if (!ParseRegRange(RegNum, RegWidth,
SubReg))
3115 return getRegularReg(RegKind, RegNum,
SubReg, RegWidth, Loc);
3118MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
3119 unsigned &RegNum,
unsigned &RegWidth,
3122 auto ListLoc = getLoc();
3125 "expected a register or a list of registers")) {
3131 auto Loc = getLoc();
3132 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
3134 if (RegWidth != 32) {
3135 Error(Loc,
"expected a single 32-bit register");
3140 RegisterKind NextRegKind;
3142 unsigned NextRegNum, NextRegWidth;
3145 if (!ParseAMDGPURegister(NextRegKind, NextReg,
3146 NextRegNum, NextRegWidth,
3150 if (NextRegWidth != 32) {
3151 Error(Loc,
"expected a single 32-bit register");
3154 if (NextRegKind != RegKind) {
3155 Error(Loc,
"registers in a list must be of the same kind");
3158 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
3163 "expected a comma or a closing square bracket")) {
3168 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3173bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3177 auto Loc = getLoc();
3181 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3183 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3185 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3190 assert(Parser.hasPendingError());
3194 if (!subtargetHasRegister(*
TRI, Reg)) {
3195 if (Reg == AMDGPU::SGPR_NULL) {
3196 Error(Loc,
"'null' operand is not supported on this GPU");
3199 " register not available on this GPU");
3207bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3210 bool RestoreOnFailure ) {
3214 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3215 if (RestoreOnFailure) {
3216 while (!Tokens.
empty()) {
3225std::optional<StringRef>
3226AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3229 return StringRef(
".amdgcn.next_free_vgpr");
3231 return StringRef(
".amdgcn.next_free_sgpr");
3233 return std::nullopt;
3237void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3238 auto SymbolName = getGprCountSymbolName(RegKind);
3239 assert(SymbolName &&
"initializing invalid register kind");
3240 MCSymbol *
Sym = getContext().getOrCreateSymbol(*SymbolName);
3242 Sym->setRedefinable(
true);
3245bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3246 unsigned DwordRegIndex,
3247 unsigned RegWidth) {
3252 auto SymbolName = getGprCountSymbolName(RegKind);
3255 MCSymbol *
Sym = getContext().getOrCreateSymbol(*SymbolName);
3257 int64_t NewMax = DwordRegIndex +
divideCeil(RegWidth, 32) - 1;
3260 if (!
Sym->isVariable())
3261 return !
Error(getLoc(),
3262 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3263 if (!
Sym->getVariableValue()->evaluateAsAbsolute(OldCount))
3266 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3268 if (OldCount <= NewMax)
3274std::unique_ptr<AMDGPUOperand>
3275AMDGPUAsmParser::parseRegister(
bool RestoreOnFailure) {
3276 const auto &Tok = getToken();
3277 SMLoc StartLoc = Tok.getLoc();
3278 SMLoc EndLoc = Tok.getEndLoc();
3279 RegisterKind RegKind;
3281 unsigned RegNum, RegWidth;
3283 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3287 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3290 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3291 return AMDGPUOperand::CreateReg(
this, Reg, StartLoc, EndLoc);
3295 bool HasSP3AbsModifier,
bool HasLit,
3299 if (isRegister() || isModifier())
3302 if (!HasLit && !HasLit64) {
3303 HasLit64 = trySkipId(
"lit64");
3304 HasLit = !HasLit64 && trySkipId(
"lit");
3305 if (HasLit || HasLit64) {
3316 const auto& Tok = getToken();
3317 const auto& NextTok = peekToken();
3320 bool Negate =
false;
3328 AMDGPUOperand::Modifiers Mods;
3330 Mods.Lit64 = HasLit64;
3340 APFloat RealVal(APFloat::IEEEdouble());
3341 auto roundMode = APFloat::rmNearestTiesToEven;
3342 if (
errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3345 RealVal.changeSign();
3348 AMDGPUOperand::CreateImm(
this, RealVal.bitcastToAPInt().getZExtValue(), S,
3349 AMDGPUOperand::ImmTyNone,
true));
3350 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3351 Op.setModifiers(Mods);
3360 if (HasSP3AbsModifier) {
3369 if (getParser().parsePrimaryExpr(Expr, EndLoc,
nullptr))
3372 if (Parser.parseExpression(Expr))
3376 if (Expr->evaluateAsAbsolute(IntVal)) {
3377 Operands.push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
3378 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3379 Op.setModifiers(Mods);
3381 if (HasLit || HasLit64)
3383 Operands.push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
3396 if (
auto R = parseRegister()) {
3405 bool HasSP3AbsMod,
bool HasLit,
3412 return parseImm(
Operands, HasSP3AbsMod, HasLit, HasLit64);
3416AMDGPUAsmParser::isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3419 return str ==
"abs" || str ==
"neg" || str ==
"sext";
3425AMDGPUAsmParser::isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3430AMDGPUAsmParser::isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3431 return isNamedOperandModifier(Token, NextToken) || Token.
is(
AsmToken::Pipe);
3435AMDGPUAsmParser::isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3436 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3453AMDGPUAsmParser::isModifier() {
3457 peekTokens(NextToken);
3459 return isOperandModifier(Tok, NextToken[0]) ||
3460 (Tok.
is(
AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3461 isOpcodeModifierWithVal(Tok, NextToken[0]);
3487AMDGPUAsmParser::parseSP3NegModifier() {
3490 peekTokens(NextToken);
3493 (isRegister(NextToken[0], NextToken[1]) ||
3495 isId(NextToken[0],
"abs"))) {
3513 return Error(getLoc(),
"invalid syntax, expected 'neg' modifier");
3515 SP3Neg = parseSP3NegModifier();
3518 Neg = trySkipId(
"neg");
3520 return Error(Loc,
"expected register or immediate");
3524 Abs = trySkipId(
"abs");
3528 Lit64 = trySkipId(
"lit64");
3532 if (!has64BitLiterals())
3533 return Error(Loc,
"lit64 is not supported on this GPU");
3536 Lit = !Lit64 && trySkipId(
"lit");
3543 return Error(Loc,
"expected register or immediate");
3547 Res = parseRegOrImm(
Operands, SP3Abs, Lit, Lit64);
3552 return (SP3Neg || Neg || SP3Abs || Abs || Lit || Lit64)
3556 if ((Lit || Lit64) && !
Operands.back()->isImm())
3557 Error(Loc,
"expected immediate with lit modifier");
3559 if (SP3Abs && !skipToken(
AsmToken::Pipe,
"expected vertical bar"))
3565 if ((Lit || Lit64) &&
3569 AMDGPUOperand::Modifiers Mods;
3570 Mods.Abs = Abs || SP3Abs;
3571 Mods.Neg = Neg || SP3Neg;
3575 if (Mods.hasFPModifiers() || Lit || Lit64) {
3576 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3578 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3579 Op.setModifiers(Mods);
3587 bool Sext = trySkipId(
"sext");
3588 if (Sext && !skipToken(
AsmToken::LParen,
"expected left paren after sext"))
3603 AMDGPUOperand::Modifiers Mods;
3606 if (Mods.hasIntModifiers()) {
3607 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3609 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3610 Op.setModifiers(Mods);
3617 return parseRegOrImmWithFPInputMods(
Operands,
false);
3621 return parseRegOrImmWithIntInputMods(
Operands,
false);
3625 auto Loc = getLoc();
3626 if (trySkipId(
"off")) {
3627 Operands.push_back(AMDGPUOperand::CreateImm(
this, 0, Loc,
3628 AMDGPUOperand::ImmTyOff,
false));
3635 std::unique_ptr<AMDGPUOperand>
Reg = parseRegister();
3637 Operands.push_back(std::move(Reg));
3644unsigned AMDGPUAsmParser::checkTargetMatchPredicate(
MCInst &Inst) {
3651 return Match_InvalidOperand;
3653 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3654 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3657 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::dst_sel);
3659 if (!
Op.isImm() ||
Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3660 return Match_InvalidOperand;
3668 if (tryAnotherVOPDEncoding(Inst))
3669 return Match_InvalidOperand;
3671 return Match_Success;
3675 static const unsigned Variants[] = {
3686 if (isForcedDPP() && isForcedVOP3()) {
3690 if (getForcedEncodingSize() == 32) {
3695 if (isForcedVOP3()) {
3700 if (isForcedSDWA()) {
3706 if (isForcedDPP()) {
3714StringRef AMDGPUAsmParser::getMatchedVariantName()
const {
3715 if (isForcedDPP() && isForcedVOP3())
3718 if (getForcedEncodingSize() == 32)
3733unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(
const MCInst &Inst)
const {
3737 case AMDGPU::FLAT_SCR:
3739 case AMDGPU::VCC_LO:
3740 case AMDGPU::VCC_HI:
3747 return AMDGPU::NoRegister;
3754bool AMDGPUAsmParser::isInlineConstant(
const MCInst &Inst,
3755 unsigned OpIdx)
const {
3765 int64_t Val = MO.
getImm();
3809unsigned AMDGPUAsmParser::getConstantBusLimit(
unsigned Opcode)
const {
3815 case AMDGPU::V_LSHLREV_B64_e64:
3816 case AMDGPU::V_LSHLREV_B64_gfx10:
3817 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3818 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3819 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3820 case AMDGPU::V_LSHRREV_B64_e64:
3821 case AMDGPU::V_LSHRREV_B64_gfx10:
3822 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3823 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3824 case AMDGPU::V_ASHRREV_I64_e64:
3825 case AMDGPU::V_ASHRREV_I64_gfx10:
3826 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3827 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3828 case AMDGPU::V_LSHL_B64_e64:
3829 case AMDGPU::V_LSHR_B64_e64:
3830 case AMDGPU::V_ASHR_I64_e64:
3843 bool AddMandatoryLiterals =
false) {
3846 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3850 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immX) : -1;
3852 return {getNamedOperandIdx(Opcode, OpName::src0X),
3853 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3854 getNamedOperandIdx(Opcode, OpName::vsrc2X),
3855 getNamedOperandIdx(Opcode, OpName::src0Y),
3856 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3857 getNamedOperandIdx(Opcode, OpName::vsrc2Y),
3862 return {getNamedOperandIdx(Opcode, OpName::src0),
3863 getNamedOperandIdx(Opcode, OpName::src1),
3864 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3867bool AMDGPUAsmParser::usesConstantBus(
const MCInst &Inst,
unsigned OpIdx) {
3870 return !isInlineConstant(Inst,
OpIdx);
3877 return isSGPR(PReg,
TRI) && PReg != SGPR_NULL;
3888 const unsigned Opcode = Inst.
getOpcode();
3889 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3892 if (!LaneSelOp.
isReg())
3895 return LaneSelReg ==
M0 || LaneSelReg == M0_gfxpre11;
3898bool AMDGPUAsmParser::validateConstantBusLimitations(
3900 const unsigned Opcode = Inst.
getOpcode();
3903 unsigned ConstantBusUseCount = 0;
3904 unsigned NumLiterals = 0;
3905 unsigned LiteralSize;
3907 if (!(
Desc.TSFlags &
3923 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3924 if (SGPRUsed != AMDGPU::NoRegister) {
3925 SGPRsUsed.
insert(SGPRUsed);
3926 ++ConstantBusUseCount;
3931 for (
int OpIdx : OpIndices) {
3936 if (usesConstantBus(Inst,
OpIdx)) {
3945 if (SGPRsUsed.
insert(LastSGPR).second) {
3946 ++ConstantBusUseCount;
3966 if (NumLiterals == 0) {
3969 }
else if (LiteralSize !=
Size) {
3975 ConstantBusUseCount += NumLiterals;
3977 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3983 Error(Loc,
"invalid operand (violates constant bus restrictions)");
3987std::optional<unsigned>
3988AMDGPUAsmParser::checkVOPDRegBankConstraints(
const MCInst &Inst,
bool AsVOPD3) {
3990 const unsigned Opcode = Inst.
getOpcode();
3996 auto getVRegIdx = [&](
unsigned,
unsigned OperandIdx) {
4005 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
4006 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
4007 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250;
4011 for (
auto OpName : {OpName::src0X, OpName::src0Y}) {
4012 int I = getNamedOperandIdx(Opcode,
OpName);
4016 int64_t
Imm =
Op.getImm();
4022 for (
auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
4023 OpName::vsrc2Y, OpName::imm}) {
4024 int I = getNamedOperandIdx(Opcode,
OpName);
4034 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
4035 getVRegIdx, *
TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
4037 return InvalidCompOprIdx;
4040bool AMDGPUAsmParser::validateVOPD(
const MCInst &Inst,
4047 for (
const std::unique_ptr<MCParsedAsmOperand> &Operand :
Operands) {
4048 AMDGPUOperand &
Op = (AMDGPUOperand &)*Operand;
4049 if ((
Op.isRegKind() ||
Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
4051 Error(
Op.getStartLoc(),
"ABS not allowed in VOPD3 instructions");
4055 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
4056 if (!InvalidCompOprIdx.has_value())
4059 auto CompOprIdx = *InvalidCompOprIdx;
4062 std::max(InstInfo[
VOPD::X].getIndexInParsedOperands(CompOprIdx),
4063 InstInfo[
VOPD::Y].getIndexInParsedOperands(CompOprIdx));
4066 auto Loc = ((AMDGPUOperand &)*
Operands[ParsedIdx]).getStartLoc();
4067 if (CompOprIdx == VOPD::Component::DST) {
4069 Error(Loc,
"dst registers must be distinct");
4071 Error(Loc,
"one dst register must be even and the other odd");
4073 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
4075 " operands must use different VGPR banks");
4083bool AMDGPUAsmParser::tryVOPD3(
const MCInst &Inst) {
4085 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst,
false);
4086 if (!InvalidCompOprIdx.has_value())
4090 InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst,
true);
4091 if (InvalidCompOprIdx.has_value()) {
4096 if (*InvalidCompOprIdx == VOPD::Component::DST)
4109bool AMDGPUAsmParser::tryVOPD(
const MCInst &Inst) {
4110 const unsigned Opcode = Inst.
getOpcode();
4125 for (
auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4126 OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4127 OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4128 int I = getNamedOperandIdx(Opcode,
OpName);
4135 return !tryVOPD3(Inst);
4140bool AMDGPUAsmParser::tryAnotherVOPDEncoding(
const MCInst &Inst) {
4141 const unsigned Opcode = Inst.
getOpcode();
4146 return tryVOPD(Inst);
4147 return tryVOPD3(Inst);
4150bool AMDGPUAsmParser::validateIntClampSupported(
const MCInst &Inst) {
4156 int ClampIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::clamp);
4167bool AMDGPUAsmParser::validateMIMGDataSize(
const MCInst &Inst,
4168 const SMLoc &IDLoc) {
4176 int VDataIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
4177 int DMaskIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dmask);
4178 int TFEIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::tfe);
4187 unsigned TFESize = (TFEIdx != -1 && Inst.
getOperand(TFEIdx).
getImm()) ? 1 : 0;
4192 bool IsPackedD16 =
false;
4196 int D16Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::d16);
4197 IsPackedD16 = D16Idx >= 0;
4199 DataSize = (DataSize + 1) / 2;
4202 if ((VDataSize / 4) == DataSize + TFESize)
4207 Modifiers = IsPackedD16 ?
"dmask and d16" :
"dmask";
4209 Modifiers = IsPackedD16 ?
"dmask, d16 and tfe" :
"dmask and tfe";
4211 Error(IDLoc,
Twine(
"image data size does not match ") + Modifiers);
4215bool AMDGPUAsmParser::validateMIMGAddrSize(
const MCInst &Inst,
4216 const SMLoc &IDLoc) {
4227 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
4229 ? AMDGPU::OpName::srsrc
4230 : AMDGPU::OpName::rsrc;
4231 int SrsrcIdx = AMDGPU::getNamedOperandIdx(
Opc, RSrcOpName);
4232 int DimIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dim);
4233 int A16Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::a16);
4237 assert(SrsrcIdx > VAddr0Idx);
4240 if (BaseOpcode->
BVH) {
4241 if (IsA16 == BaseOpcode->
A16)
4243 Error(IDLoc,
"image address size does not match a16");
4249 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
4250 unsigned ActualAddrSize =
4251 IsNSA ? SrsrcIdx - VAddr0Idx
4254 unsigned ExpectedAddrSize =
4258 if (hasPartialNSAEncoding() &&
4261 int VAddrLastIdx = SrsrcIdx - 1;
4262 unsigned VAddrLastSize =
4265 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
4268 if (ExpectedAddrSize > 12)
4269 ExpectedAddrSize = 16;
4274 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4278 if (ActualAddrSize == ExpectedAddrSize)
4281 Error(IDLoc,
"image address size does not match dim and a16");
4285bool AMDGPUAsmParser::validateMIMGAtomicDMask(
const MCInst &Inst) {
4292 if (!
Desc.mayLoad() || !
Desc.mayStore())
4295 int DMaskIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dmask);
4302 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4305bool AMDGPUAsmParser::validateMIMGGatherDMask(
const MCInst &Inst) {
4313 int DMaskIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dmask);
4321 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4324bool AMDGPUAsmParser::validateMIMGDim(
const MCInst &Inst,
4339 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4340 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4347bool AMDGPUAsmParser::validateMIMGMSAA(
const MCInst &Inst) {
4358 if (!BaseOpcode->
MSAA)
4361 int DimIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dim);
4367 return DimInfo->
MSAA;
4373 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4374 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4375 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4385bool AMDGPUAsmParser::validateMovrels(
const MCInst &Inst,
4394 const int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
4409 Error(ErrLoc,
"source operand must be a VGPR");
4413bool AMDGPUAsmParser::validateMAIAccWrite(
const MCInst &Inst,
4418 if (
Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4421 const int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
4432 "source operand must be either a VGPR or an inline constant");
4439bool AMDGPUAsmParser::validateMAISrc2(
const MCInst &Inst,
4445 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4448 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4452 if (Inst.
getOperand(Src2Idx).
isImm() && isInlineConstant(Inst, Src2Idx)) {
4454 "inline constants are not allowed for this operand");
4461bool AMDGPUAsmParser::validateMFMA(
const MCInst &Inst,
4469 int BlgpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::blgp);
4470 if (BlgpIdx != -1) {
4472 int CbszIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::cbsz);
4482 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
4485 "wrong register tuple size for cbsz value " +
Twine(CBSZ));
4490 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
4493 "wrong register tuple size for blgp value " +
Twine(BLGP));
4501 const int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
4511 if (Src2Reg == DstReg)
4515 if (
TRI->getRegClass(
Desc.operands()[0].RegClass).getSizeInBits() <= 128)
4518 if (
TRI->regsOverlap(Src2Reg, DstReg)) {
4520 "source 2 operand must not partially overlap with dst");
4527bool AMDGPUAsmParser::validateDivScale(
const MCInst &Inst) {
4531 case V_DIV_SCALE_F32_gfx6_gfx7:
4532 case V_DIV_SCALE_F32_vi:
4533 case V_DIV_SCALE_F32_gfx10:
4534 case V_DIV_SCALE_F64_gfx6_gfx7:
4535 case V_DIV_SCALE_F64_vi:
4536 case V_DIV_SCALE_F64_gfx10:
4542 for (
auto Name : {AMDGPU::OpName::src0_modifiers,
4543 AMDGPU::OpName::src2_modifiers,
4544 AMDGPU::OpName::src2_modifiers}) {
4555bool AMDGPUAsmParser::validateMIMGD16(
const MCInst &Inst) {
4563 int D16Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::d16);
4572bool AMDGPUAsmParser::validateTensorR128(
const MCInst &Inst) {
4579 int R128Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::r128);
4587 case AMDGPU::V_SUBREV_F32_e32:
4588 case AMDGPU::V_SUBREV_F32_e64:
4589 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4590 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4591 case AMDGPU::V_SUBREV_F32_e32_vi:
4592 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4593 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4594 case AMDGPU::V_SUBREV_F32_e64_vi:
4596 case AMDGPU::V_SUBREV_CO_U32_e32:
4597 case AMDGPU::V_SUBREV_CO_U32_e64:
4598 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4599 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4601 case AMDGPU::V_SUBBREV_U32_e32:
4602 case AMDGPU::V_SUBBREV_U32_e64:
4603 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4604 case AMDGPU::V_SUBBREV_U32_e32_vi:
4605 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4606 case AMDGPU::V_SUBBREV_U32_e64_vi:
4608 case AMDGPU::V_SUBREV_U32_e32:
4609 case AMDGPU::V_SUBREV_U32_e64:
4610 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4611 case AMDGPU::V_SUBREV_U32_e32_vi:
4612 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4613 case AMDGPU::V_SUBREV_U32_e64_vi:
4615 case AMDGPU::V_SUBREV_F16_e32:
4616 case AMDGPU::V_SUBREV_F16_e64:
4617 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4618 case AMDGPU::V_SUBREV_F16_e32_vi:
4619 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4620 case AMDGPU::V_SUBREV_F16_e64_vi:
4622 case AMDGPU::V_SUBREV_U16_e32:
4623 case AMDGPU::V_SUBREV_U16_e64:
4624 case AMDGPU::V_SUBREV_U16_e32_vi:
4625 case AMDGPU::V_SUBREV_U16_e64_vi:
4627 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4628 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4629 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4631 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4632 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4634 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4635 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4637 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4638 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4640 case AMDGPU::V_LSHRREV_B32_e32:
4641 case AMDGPU::V_LSHRREV_B32_e64:
4642 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4643 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4644 case AMDGPU::V_LSHRREV_B32_e32_vi:
4645 case AMDGPU::V_LSHRREV_B32_e64_vi:
4646 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4647 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4649 case AMDGPU::V_ASHRREV_I32_e32:
4650 case AMDGPU::V_ASHRREV_I32_e64:
4651 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4652 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4653 case AMDGPU::V_ASHRREV_I32_e32_vi:
4654 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4655 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4656 case AMDGPU::V_ASHRREV_I32_e64_vi:
4658 case AMDGPU::V_LSHLREV_B32_e32:
4659 case AMDGPU::V_LSHLREV_B32_e64:
4660 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4661 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4662 case AMDGPU::V_LSHLREV_B32_e32_vi:
4663 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4664 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4665 case AMDGPU::V_LSHLREV_B32_e64_vi:
4667 case AMDGPU::V_LSHLREV_B16_e32:
4668 case AMDGPU::V_LSHLREV_B16_e64:
4669 case AMDGPU::V_LSHLREV_B16_e32_vi:
4670 case AMDGPU::V_LSHLREV_B16_e64_vi:
4671 case AMDGPU::V_LSHLREV_B16_gfx10:
4673 case AMDGPU::V_LSHRREV_B16_e32:
4674 case AMDGPU::V_LSHRREV_B16_e64:
4675 case AMDGPU::V_LSHRREV_B16_e32_vi:
4676 case AMDGPU::V_LSHRREV_B16_e64_vi:
4677 case AMDGPU::V_LSHRREV_B16_gfx10:
4679 case AMDGPU::V_ASHRREV_I16_e32:
4680 case AMDGPU::V_ASHRREV_I16_e64:
4681 case AMDGPU::V_ASHRREV_I16_e32_vi:
4682 case AMDGPU::V_ASHRREV_I16_e64_vi:
4683 case AMDGPU::V_ASHRREV_I16_gfx10:
4685 case AMDGPU::V_LSHLREV_B64_e64:
4686 case AMDGPU::V_LSHLREV_B64_gfx10:
4687 case AMDGPU::V_LSHLREV_B64_vi:
4689 case AMDGPU::V_LSHRREV_B64_e64:
4690 case AMDGPU::V_LSHRREV_B64_gfx10:
4691 case AMDGPU::V_LSHRREV_B64_vi:
4693 case AMDGPU::V_ASHRREV_I64_e64:
4694 case AMDGPU::V_ASHRREV_I64_gfx10:
4695 case AMDGPU::V_ASHRREV_I64_vi:
4697 case AMDGPU::V_PK_LSHLREV_B16:
4698 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4699 case AMDGPU::V_PK_LSHLREV_B16_vi:
4701 case AMDGPU::V_PK_LSHRREV_B16:
4702 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4703 case AMDGPU::V_PK_LSHRREV_B16_vi:
4704 case AMDGPU::V_PK_ASHRREV_I16:
4705 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4706 case AMDGPU::V_PK_ASHRREV_I16_vi:
4713std::optional<StringRef>
4714AMDGPUAsmParser::validateLdsDirect(
const MCInst &Inst) {
4716 using namespace SIInstrFlags;
4717 const unsigned Opcode = Inst.
getOpcode();
4723 if ((
Desc.TSFlags & Enc) == 0)
4724 return std::nullopt;
4726 for (
auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4727 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4731 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4734 return StringRef(
"lds_direct is not supported on this GPU");
4737 return StringRef(
"lds_direct cannot be used with this instruction");
4739 if (SrcName != OpName::src0)
4740 return StringRef(
"lds_direct may be used as src0 only");
4744 return std::nullopt;
4748 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4749 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4750 if (
Op.isFlatOffset())
4751 return Op.getStartLoc();
4756bool AMDGPUAsmParser::validateOffset(
const MCInst &Inst,
4759 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4765 return validateFlatOffset(Inst,
Operands);
4768 return validateSMEMOffset(Inst,
Operands);
4773 const unsigned OffsetSize = 24;
4774 if (!
isIntN(OffsetSize,
Op.getImm())) {
4776 Twine(
"expected a ") +
Twine(OffsetSize) +
"-bit signed offset");
4780 const unsigned OffsetSize = 16;
4781 if (!
isUIntN(OffsetSize,
Op.getImm())) {
4783 Twine(
"expected a ") +
Twine(OffsetSize) +
"-bit unsigned offset");
4790bool AMDGPUAsmParser::validateFlatOffset(
const MCInst &Inst,
4797 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4801 if (!hasFlatOffsets() &&
Op.getImm() != 0) {
4803 "flat offset modifier is not supported on this GPU");
4810 bool AllowNegative =
4813 if (!
isIntN(OffsetSize,
Op.getImm()) || (!AllowNegative &&
Op.getImm() < 0)) {
4815 Twine(
"expected a ") +
4816 (AllowNegative ?
Twine(OffsetSize) +
"-bit signed offset"
4817 :
Twine(OffsetSize - 1) +
"-bit unsigned offset"));
4826 for (
unsigned i = 2, e =
Operands.size(); i != e; ++i) {
4827 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4828 if (
Op.isSMEMOffset() ||
Op.isSMEMOffsetMod())
4829 return Op.getStartLoc();
4834bool AMDGPUAsmParser::validateSMEMOffset(
const MCInst &Inst,
4844 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4860 : (
isVI() || IsBuffer) ?
"expected a 20-bit unsigned offset"
4861 :
"expected a 21-bit signed offset");
4866bool AMDGPUAsmParser::validateSOPLiteral(
const MCInst &Inst)
const {
4872 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4873 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4875 const int OpIndices[] = { Src0Idx, Src1Idx };
4877 unsigned NumExprs = 0;
4878 unsigned NumLiterals = 0;
4881 for (
int OpIdx : OpIndices) {
4882 if (
OpIdx == -1)
break;
4887 if (MO.
isImm() && !isInlineConstant(Inst,
OpIdx)) {
4889 if (NumLiterals == 0 || LiteralValue !=
Value) {
4893 }
else if (MO.
isExpr()) {
4899 return NumLiterals + NumExprs <= 1;
4902bool AMDGPUAsmParser::validateOpSel(
const MCInst &Inst) {
4905 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4915 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4916 if (OpSelIdx != -1) {
4920 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel_hi);
4921 if (OpSelHiIdx != -1) {
4930 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4940 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
4941 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
4942 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4943 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel_hi);
4952 auto VerifyOneSGPR = [
OpSel, OpSelHi](
unsigned Index) ->
bool {
4954 return ((OpSel & Mask) == 0) && ((OpSelHi & Mask) == 0);
4964 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
4965 if (Src2Idx != -1) {
4976bool AMDGPUAsmParser::validateTrue16OpSel(
const MCInst &Inst) {
4977 if (!hasTrue16Insts())
4981 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4987 if (OpSelOpValue == 0)
4989 unsigned OpCount = 0;
4990 for (AMDGPU::OpName
OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
4991 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
4997 MRI->getRegClass(AMDGPU::VGPR_16RegClassID).contains(
Op.getReg())) {
4999 bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);
5000 if (OpSelOpIsHi != VGPRSuffixIsHi)
5009bool AMDGPUAsmParser::validateNeg(
const MCInst &Inst, AMDGPU::OpName
OpName) {
5023 int NegIdx = AMDGPU::getNamedOperandIdx(
Opc,
OpName);
5034 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
5035 AMDGPU::OpName::src1_modifiers,
5036 AMDGPU::OpName::src2_modifiers};
5038 for (
unsigned i = 0; i < 3; ++i) {
5048bool AMDGPUAsmParser::validateDPP(
const MCInst &Inst,
5051 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dpp_ctrl);
5052 if (DppCtrlIdx >= 0) {
5060 Error(S,
isGFX12() ?
"DP ALU dpp only supports row_share"
5061 :
"DP ALU dpp only supports row_newbcast");
5066 int Dpp8Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dpp8);
5067 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
5070 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
5077 Error(S,
"invalid operand for instruction");
5082 "src1 immediate operand invalid for instruction");
5092bool AMDGPUAsmParser::validateVccOperand(
MCRegister Reg)
const {
5093 auto FB = getFeatureBits();
5094 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
5095 (FB[AMDGPU::FeatureWavefrontSize32] &&
Reg == AMDGPU::VCC_LO);
5099bool AMDGPUAsmParser::validateVOPLiteral(
const MCInst &Inst,
5103 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
5105 !HasMandatoryLiteral && !
isVOPD(Opcode))
5110 unsigned NumExprs = 0;
5111 unsigned NumLiterals = 0;
5114 for (
int OpIdx : OpIndices) {
5124 if (MO.
isImm() && !isInlineConstant(Inst,
OpIdx)) {
5129 HasMandatoryLiteral);
5134 if (!IsValid32Op && !isInt<32>(
Value) && !isUInt<32>(
Value) &&
5135 !IsForcedFP64 && (!has64BitLiterals() ||
Desc.getSize() != 4)) {
5136 Error(getLitLoc(
Operands),
"invalid operand for instruction");
5140 if (IsFP64 && IsValid32Op && !IsForcedFP64)
5143 if (NumLiterals == 0 || LiteralValue !=
Value) {
5147 }
else if (MO.
isExpr()) {
5151 NumLiterals += NumExprs;
5156 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
5157 Error(getLitLoc(
Operands),
"literal operands are not supported");
5161 if (NumLiterals > 1) {
5162 Error(getLitLoc(
Operands,
true),
"only one unique literal operand is allowed");
5181 auto Reg =
Sub ?
Sub :
Op.getReg();
5183 return AGPR32.
contains(Reg) ? 1 : 0;
5186bool AMDGPUAsmParser::validateAGPRLdSt(
const MCInst &Inst)
const {
5194 ? AMDGPU::OpName::data0
5195 : AMDGPU::OpName::vdata;
5203 if (Data2Areg >= 0 && Data2Areg != DataAreg)
5207 auto FB = getFeatureBits();
5208 if (FB[AMDGPU::FeatureGFX90AInsts]) {
5209 if (DataAreg < 0 || DstAreg < 0)
5211 return DstAreg == DataAreg;
5214 return DstAreg < 1 && DataAreg < 1;
5217bool AMDGPUAsmParser::validateVGPRAlign(
const MCInst &Inst)
const {
5218 auto FB = getFeatureBits();
5219 if (!FB[AMDGPU::FeatureGFX90AInsts] && !FB[AMDGPU::FeatureGFX1250Insts])
5226 if (FB[AMDGPU::FeatureGFX90AInsts] &&
Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5229 if (FB[AMDGPU::FeatureGFX1250Insts]) {
5233 case AMDGPU::DS_LOAD_TR6_B96:
5234 case AMDGPU::DS_LOAD_TR6_B96_gfx12:
5238 case AMDGPU::GLOBAL_LOAD_TR6_B96:
5239 case AMDGPU::GLOBAL_LOAD_TR6_B96_gfx1250: {
5243 int VAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
5244 if (VAddrIdx != -1) {
5247 if ((
Sub - AMDGPU::VGPR0) & 1)
5252 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR:
5253 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR_gfx1250:
5279 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
5280 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
5282 return Op.getStartLoc();
5287bool AMDGPUAsmParser::validateBLGP(
const MCInst &Inst,
5290 int BlgpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::blgp);
5297 auto FB = getFeatureBits();
5298 bool UsesNeg =
false;
5299 if (FB[AMDGPU::FeatureGFX940Insts]) {
5301 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
5302 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
5303 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
5304 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
5309 if (IsNeg == UsesNeg)
5313 UsesNeg ?
"invalid modifier: blgp is not supported"
5314 :
"invalid modifier: neg is not supported");
5319bool AMDGPUAsmParser::validateWaitCnt(
const MCInst &Inst,
5325 if (
Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
5326 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
5327 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
5328 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
5331 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::sdst);
5334 if (Reg == AMDGPU::SGPR_NULL)
5338 Error(RegLoc,
"src0 must be null");
5342bool AMDGPUAsmParser::validateDS(
const MCInst &Inst,
5348 return validateGWS(Inst,
Operands);
5353 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::gds);
5359 Error(S,
"gds modifier is not supported on this GPU");
5367bool AMDGPUAsmParser::validateGWS(
const MCInst &Inst,
5369 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5373 if (
Opc != AMDGPU::DS_GWS_INIT_vi &&
Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5374 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5380 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::data0);
5383 auto RegIdx =
Reg - (VGPR32.
contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5386 Error(RegLoc,
"vgpr must be even aligned");
5393bool AMDGPUAsmParser::validateCoherencyBits(
const MCInst &Inst,
5395 const SMLoc &IDLoc) {
5396 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.
getOpcode(),
5397 AMDGPU::OpName::cpol);
5408 Error(S,
"scale_offset is not supported on this GPU");
5414 Error(S,
"nv is not supported on this GPU");
5422 Error(S,
"scale_offset is not supported for this instruction");
5426 return validateTHAndScopeBits(Inst,
Operands, CPol);
5432 Error(S,
"cache policy is not supported for SMRD instructions");
5436 Error(IDLoc,
"invalid cache policy for SMEM instruction");
5445 if (!(TSFlags & AllowSCCModifier)) {
5450 "scc modifier is not supported for this instruction on this GPU");
5461 :
"instruction must use glc");
5469 &CStr.data()[CStr.find(
isGFX940() ?
"sc0" :
"glc")]);
5471 :
"instruction must not use glc");
5479bool AMDGPUAsmParser::validateTHAndScopeBits(
const MCInst &Inst,
5481 const unsigned CPol) {
5485 const unsigned Opcode = Inst.
getOpcode();
5497 return PrintError(
"instruction must use th:TH_ATOMIC_RETURN");
5505 return PrintError(
"invalid th value for SMEM instruction");
5512 return PrintError(
"scope and th combination is not valid");
5518 return PrintError(
"invalid th value for atomic instructions");
5521 return PrintError(
"invalid th value for store instructions");
5524 return PrintError(
"invalid th value for load instructions");
5530bool AMDGPUAsmParser::validateTFE(
const MCInst &Inst,
5533 if (
Desc.mayStore() &&
5537 Error(Loc,
"TFE modifier has no meaning for store instructions");
5545bool AMDGPUAsmParser::validateWMMA(
const MCInst &Inst,
5551 auto validateFmt = [&](AMDGPU::OpName FmtOp, AMDGPU::OpName
SrcOp) ->
bool {
5552 int FmtIdx = AMDGPU::getNamedOperandIdx(
Opc, FmtOp);
5556 int SrcIdx = AMDGPU::getNamedOperandIdx(
Opc,
SrcOp);
5558 TRI->getRegClass(
Desc.operands()[SrcIdx].RegClass).getSizeInBits();
5563 static const char *FmtNames[] = {
"MATRIX_FMT_FP8",
"MATRIX_FMT_BF8",
5564 "MATRIX_FMT_FP6",
"MATRIX_FMT_BF6",
5568 "wrong register tuple size for " +
Twine(FmtNames[Fmt]));
5572 return validateFmt(AMDGPU::OpName::matrix_a_fmt, AMDGPU::OpName::src0) &&
5573 validateFmt(AMDGPU::OpName::matrix_b_fmt, AMDGPU::OpName::src1);
5576bool AMDGPUAsmParser::validateInstruction(
const MCInst &Inst,
5579 if (
auto ErrMsg = validateLdsDirect(Inst)) {
5583 if (!validateTrue16OpSel(Inst)) {
5585 "op_sel operand conflicts with 16-bit operand suffix");
5588 if (!validateSOPLiteral(Inst)) {
5590 "only one unique literal operand is allowed");
5593 if (!validateVOPLiteral(Inst,
Operands)) {
5596 if (!validateConstantBusLimitations(Inst,
Operands)) {
5599 if (!validateVOPD(Inst,
Operands)) {
5602 if (!validateIntClampSupported(Inst)) {
5604 "integer clamping is not supported on this GPU");
5607 if (!validateOpSel(Inst)) {
5609 "invalid op_sel operand");
5612 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5614 "invalid neg_lo operand");
5617 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5619 "invalid neg_hi operand");
5622 if (!validateDPP(Inst,
Operands)) {
5626 if (!validateMIMGD16(Inst)) {
5628 "d16 modifier is not supported on this GPU");
5631 if (!validateMIMGDim(Inst,
Operands)) {
5632 Error(IDLoc,
"missing dim operand");
5635 if (!validateTensorR128(Inst)) {
5637 "instruction must set modifier r128=0");
5640 if (!validateMIMGMSAA(Inst)) {
5642 "invalid dim; must be MSAA type");
5645 if (!validateMIMGDataSize(Inst, IDLoc)) {
5648 if (!validateMIMGAddrSize(Inst, IDLoc))
5650 if (!validateMIMGAtomicDMask(Inst)) {
5652 "invalid atomic image dmask");
5655 if (!validateMIMGGatherDMask(Inst)) {
5657 "invalid image_gather dmask: only one bit must be set");
5660 if (!validateMovrels(Inst,
Operands)) {
5663 if (!validateOffset(Inst,
Operands)) {
5666 if (!validateMAIAccWrite(Inst,
Operands)) {
5669 if (!validateMAISrc2(Inst,
Operands)) {
5672 if (!validateMFMA(Inst,
Operands)) {
5675 if (!validateCoherencyBits(Inst,
Operands, IDLoc)) {
5679 if (!validateAGPRLdSt(Inst)) {
5680 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5681 ?
"invalid register class: data and dst should be all VGPR or AGPR"
5682 :
"invalid register class: agpr loads and stores not supported on this GPU"
5686 if (!validateVGPRAlign(Inst)) {
5688 "invalid register class: vgpr tuples must be 64 bit aligned");
5695 if (!validateBLGP(Inst,
Operands)) {
5699 if (!validateDivScale(Inst)) {
5700 Error(IDLoc,
"ABS not allowed in VOP3B instructions");
5703 if (!validateWaitCnt(Inst,
Operands)) {
5706 if (!validateTFE(Inst,
Operands)) {
5709 if (!validateWMMA(Inst,
Operands)) {
5718 unsigned VariantID = 0);
5722 unsigned VariantID);
5724bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
5729bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
5732 for (
auto Variant : Variants) {
5740bool AMDGPUAsmParser::checkUnsupportedInstruction(
StringRef Mnemo,
5741 const SMLoc &IDLoc) {
5742 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5745 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5750 getParser().clearPendingErrors();
5754 StringRef VariantName = getMatchedVariantName();
5755 if (!VariantName.
empty() && isSupportedMnemo(Mnemo, FBS)) {
5758 " variant of this instruction is not supported"));
5762 if (
isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5763 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5766 FeaturesWS32.
flip(AMDGPU::FeatureWavefrontSize64)
5767 .
flip(AMDGPU::FeatureWavefrontSize32);
5769 ComputeAvailableFeatures(FeaturesWS32);
5771 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5772 return Error(IDLoc,
"instruction requires wavesize=32");
5777 return Error(IDLoc,
"instruction not supported on this GPU");
5782 return Error(IDLoc,
"invalid instruction" + Suggestion);
5788 const auto &
Op = ((AMDGPUOperand &)*
Operands[InvalidOprIdx]);
5789 if (
Op.isToken() && InvalidOprIdx > 1) {
5790 const auto &PrevOp = ((AMDGPUOperand &)*
Operands[InvalidOprIdx - 1]);
5791 return PrevOp.isToken() && PrevOp.getToken() ==
"::";
5796bool AMDGPUAsmParser::matchAndEmitInstruction(
SMLoc IDLoc,
unsigned &Opcode,
5800 bool MatchingInlineAsm) {
5802 unsigned Result = Match_Success;
5803 for (
auto Variant : getMatchedVariants()) {
5805 auto R = MatchInstructionImpl(
Operands, Inst, EI, MatchingInlineAsm,
5810 if (R == Match_Success || R == Match_MissingFeature ||
5811 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5812 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5813 Result != Match_MissingFeature)) {
5817 if (R == Match_Success)
5821 if (Result == Match_Success) {
5822 if (!validateInstruction(Inst, IDLoc,
Operands)) {
5831 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5837 case Match_MissingFeature:
5841 return Error(IDLoc,
"operands are not valid for this GPU or mode");
5843 case Match_InvalidOperand: {
5844 SMLoc ErrorLoc = IDLoc;
5847 return Error(IDLoc,
"too few operands for instruction");
5850 if (ErrorLoc ==
SMLoc())
5854 return Error(ErrorLoc,
"invalid VOPDY instruction");
5856 return Error(ErrorLoc,
"invalid operand for instruction");
5859 case Match_MnemonicFail:
5865bool AMDGPUAsmParser::ParseAsAbsoluteExpression(
uint32_t &Ret) {
5870 if (getParser().parseAbsoluteExpression(Tmp)) {
5877bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5878 if (!getSTI().getTargetTriple().isAMDGCN())
5879 return TokError(
"directive only supported for amdgcn architecture");
5881 std::string TargetIDDirective;
5882 SMLoc TargetStart = getTok().getLoc();
5883 if (getParser().parseEscapedString(TargetIDDirective))
5887 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
5888 return getParser().Error(TargetRange.
Start,
5889 (
Twine(
".amdgcn_target directive's target id ") +
5890 Twine(TargetIDDirective) +
5891 Twine(
" does not match the specified target id ") +
5892 Twine(getTargetStreamer().getTargetID()->
toString())).str());
5901bool AMDGPUAsmParser::calculateGPRBlocks(
5903 const MCExpr *FlatScrUsed,
bool XNACKUsed,
5904 std::optional<bool> EnableWavefrontSize32,
const MCExpr *NextFreeVGPR,
5906 const MCExpr *&VGPRBlocks,
const MCExpr *&SGPRBlocks) {
5913 int64_t EvaluatedSGPRs;
5918 unsigned MaxAddressableNumSGPRs =
5921 if (
NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
Version.Major >= 8 &&
5922 !Features.
test(FeatureSGPRInitBug) &&
5923 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5924 return OutOfRangeError(SGPRRange);
5926 const MCExpr *ExtraSGPRs =
5930 if (
NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5931 (
Version.Major <= 7 || Features.
test(FeatureSGPRInitBug)) &&
5932 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5933 return OutOfRangeError(SGPRRange);
5935 if (Features.
test(FeatureSGPRInitBug))
5942 auto GetNumGPRBlocks = [&Ctx](
const MCExpr *NumGPR,
5943 unsigned Granule) ->
const MCExpr * {
5947 const MCExpr *AlignToGPR =
5955 VGPRBlocks = GetNumGPRBlocks(
5964bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5965 if (!getSTI().getTargetTriple().isAMDGCN())
5966 return TokError(
"directive only supported for amdgcn architecture");
5969 return TokError(
"directive only supported for amdhsa OS");
5972 if (getParser().parseIdentifier(KernelName))
5977 &getSTI(), getContext());
5987 const MCExpr *NextFreeVGPR = ZeroExpr;
5989 const MCExpr *NamedBarCnt = ZeroExpr;
5994 const MCExpr *NextFreeSGPR = ZeroExpr;
5997 unsigned ImpliedUserSGPRCount = 0;
6001 std::optional<unsigned> ExplicitUserSGPRCount;
6002 const MCExpr *ReserveVCC = OneExpr;
6003 const MCExpr *ReserveFlatScr = OneExpr;
6004 std::optional<bool> EnableWavefrontSize32;
6010 SMRange IDRange = getTok().getLocRange();
6011 if (!parseId(
ID,
"expected .amdhsa_ directive or .end_amdhsa_kernel"))
6014 if (
ID ==
".end_amdhsa_kernel")
6018 return TokError(
".amdhsa_ directives cannot be repeated");
6020 SMLoc ValStart = getLoc();
6022 if (getParser().parseExpression(ExprVal))
6024 SMLoc ValEnd = getLoc();
6029 bool EvaluatableExpr;
6030 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
6032 return OutOfRangeError(ValRange);
6036#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
6037 if (!isUInt<ENTRY##_WIDTH>(Val)) \
6038 return OutOfRangeError(RANGE); \
6039 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
6044#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
6046 return Error(IDRange.Start, "directive should have resolvable expression", \
6049 if (
ID ==
".amdhsa_group_segment_fixed_size") {
6052 return OutOfRangeError(ValRange);
6054 }
else if (
ID ==
".amdhsa_private_segment_fixed_size") {
6057 return OutOfRangeError(ValRange);
6059 }
else if (
ID ==
".amdhsa_kernarg_size") {
6061 return OutOfRangeError(ValRange);
6063 }
else if (
ID ==
".amdhsa_user_sgpr_count") {
6065 ExplicitUserSGPRCount = Val;
6066 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_buffer") {
6070 "directive is not supported with architected flat scratch",
6073 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
6076 ImpliedUserSGPRCount += 4;
6077 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_length") {
6080 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6083 return OutOfRangeError(ValRange);
6087 ImpliedUserSGPRCount += Val;
6088 PreloadLength = Val;
6090 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_offset") {
6093 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6096 return OutOfRangeError(ValRange);
6100 PreloadOffset = Val;
6101 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_ptr") {
6104 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
6107 ImpliedUserSGPRCount += 2;
6108 }
else if (
ID ==
".amdhsa_user_sgpr_queue_ptr") {
6111 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
6114 ImpliedUserSGPRCount += 2;
6115 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_segment_ptr") {
6118 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
6121 ImpliedUserSGPRCount += 2;
6122 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_id") {
6125 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
6128 ImpliedUserSGPRCount += 2;
6129 }
else if (
ID ==
".amdhsa_user_sgpr_flat_scratch_init") {
6132 "directive is not supported with architected flat scratch",
6136 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
6139 ImpliedUserSGPRCount += 2;
6140 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_size") {
6143 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
6146 ImpliedUserSGPRCount += 1;
6147 }
else if (
ID ==
".amdhsa_uses_cu_stores") {
6149 return Error(IDRange.
Start,
"directive requires gfx12.5", IDRange);
6152 KERNEL_CODE_PROPERTY_USES_CU_STORES, ExprVal, ValRange);
6153 }
else if (
ID ==
".amdhsa_wavefront_size32") {
6155 if (IVersion.
Major < 10)
6156 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
6157 EnableWavefrontSize32 = Val;
6159 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
6161 }
else if (
ID ==
".amdhsa_uses_dynamic_stack") {
6163 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
6165 }
else if (
ID ==
".amdhsa_system_sgpr_private_segment_wavefront_offset") {
6168 "directive is not supported with architected flat scratch",
6171 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6173 }
else if (
ID ==
".amdhsa_enable_private_segment") {
6177 "directive is not supported without architected flat scratch",
6180 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6182 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_x") {
6184 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
6186 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_y") {
6188 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
6190 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_z") {
6192 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
6194 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_info") {
6196 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
6198 }
else if (
ID ==
".amdhsa_system_vgpr_workitem_id") {
6200 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
6202 }
else if (
ID ==
".amdhsa_next_free_vgpr") {
6203 VGPRRange = ValRange;
6204 NextFreeVGPR = ExprVal;
6205 }
else if (
ID ==
".amdhsa_next_free_sgpr") {
6206 SGPRRange = ValRange;
6207 NextFreeSGPR = ExprVal;
6208 }
else if (
ID ==
".amdhsa_accum_offset") {
6210 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6211 AccumOffset = ExprVal;
6212 }
else if (
ID ==
".amdhsa_named_barrier_count") {
6214 return Error(IDRange.
Start,
"directive requires gfx1250+", IDRange);
6215 NamedBarCnt = ExprVal;
6216 }
else if (
ID ==
".amdhsa_reserve_vcc") {
6217 if (EvaluatableExpr && !isUInt<1>(Val))
6218 return OutOfRangeError(ValRange);
6219 ReserveVCC = ExprVal;
6220 }
else if (
ID ==
".amdhsa_reserve_flat_scratch") {
6221 if (IVersion.
Major < 7)
6222 return Error(IDRange.
Start,
"directive requires gfx7+", IDRange);
6225 "directive is not supported with architected flat scratch",
6227 if (EvaluatableExpr && !isUInt<1>(Val))
6228 return OutOfRangeError(ValRange);
6229 ReserveFlatScr = ExprVal;
6230 }
else if (
ID ==
".amdhsa_reserve_xnack_mask") {
6231 if (IVersion.
Major < 8)
6232 return Error(IDRange.
Start,
"directive requires gfx8+", IDRange);
6233 if (!isUInt<1>(Val))
6234 return OutOfRangeError(ValRange);
6235 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
6236 return getParser().Error(IDRange.
Start,
".amdhsa_reserve_xnack_mask does not match target id",
6238 }
else if (
ID ==
".amdhsa_float_round_mode_32") {
6240 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
6242 }
else if (
ID ==
".amdhsa_float_round_mode_16_64") {
6244 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
6246 }
else if (
ID ==
".amdhsa_float_denorm_mode_32") {
6248 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
6250 }
else if (
ID ==
".amdhsa_float_denorm_mode_16_64") {
6252 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
6254 }
else if (
ID ==
".amdhsa_dx10_clamp") {
6255 if (IVersion.
Major >= 12)
6256 return Error(IDRange.
Start,
"directive unsupported on gfx12+", IDRange);
6258 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
6260 }
else if (
ID ==
".amdhsa_ieee_mode") {
6261 if (IVersion.
Major >= 12)
6262 return Error(IDRange.
Start,
"directive unsupported on gfx12+", IDRange);
6264 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
6266 }
else if (
ID ==
".amdhsa_fp16_overflow") {
6267 if (IVersion.
Major < 9)
6268 return Error(IDRange.
Start,
"directive requires gfx9+", IDRange);
6270 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
6272 }
else if (
ID ==
".amdhsa_tg_split") {
6274 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6277 }
else if (
ID ==
".amdhsa_workgroup_processor_mode") {
6280 "directive unsupported on " + getSTI().
getCPU(), IDRange);
6282 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
6284 }
else if (
ID ==
".amdhsa_memory_ordered") {
6285 if (IVersion.
Major < 10)
6286 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
6288 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
6290 }
else if (
ID ==
".amdhsa_forward_progress") {
6291 if (IVersion.
Major < 10)
6292 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
6294 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
6296 }
else if (
ID ==
".amdhsa_shared_vgpr_count") {
6298 if (IVersion.
Major < 10 || IVersion.
Major >= 12)
6299 return Error(IDRange.
Start,
"directive requires gfx10 or gfx11",
6301 SharedVGPRCount = Val;
6303 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
6305 }
else if (
ID ==
".amdhsa_inst_pref_size") {
6306 if (IVersion.
Major < 11)
6307 return Error(IDRange.
Start,
"directive requires gfx11+", IDRange);
6308 if (IVersion.
Major == 11) {
6310 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
6314 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
6317 }
else if (
ID ==
".amdhsa_exception_fp_ieee_invalid_op") {
6320 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
6322 }
else if (
ID ==
".amdhsa_exception_fp_denorm_src") {
6324 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
6326 }
else if (
ID ==
".amdhsa_exception_fp_ieee_div_zero") {
6329 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
6331 }
else if (
ID ==
".amdhsa_exception_fp_ieee_overflow") {
6333 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
6335 }
else if (
ID ==
".amdhsa_exception_fp_ieee_underflow") {
6337 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
6339 }
else if (
ID ==
".amdhsa_exception_fp_ieee_inexact") {
6341 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
6343 }
else if (
ID ==
".amdhsa_exception_int_div_zero") {
6345 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
6347 }
else if (
ID ==
".amdhsa_round_robin_scheduling") {
6348 if (IVersion.
Major < 12)
6349 return Error(IDRange.
Start,
"directive requires gfx12+", IDRange);
6351 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
6354 return Error(IDRange.
Start,
"unknown .amdhsa_kernel directive", IDRange);
6357#undef PARSE_BITS_ENTRY
6360 if (!Seen.
contains(
".amdhsa_next_free_vgpr"))
6361 return TokError(
".amdhsa_next_free_vgpr directive is required");
6363 if (!Seen.
contains(
".amdhsa_next_free_sgpr"))
6364 return TokError(
".amdhsa_next_free_sgpr directive is required");
6366 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
6371 if (PreloadLength) {
6377 const MCExpr *VGPRBlocks;
6378 const MCExpr *SGPRBlocks;
6379 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
6380 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
6381 EnableWavefrontSize32, NextFreeVGPR,
6382 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
6386 int64_t EvaluatedVGPRBlocks;
6387 bool VGPRBlocksEvaluatable =
6388 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
6389 if (VGPRBlocksEvaluatable &&
6390 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
6391 static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
6392 return OutOfRangeError(VGPRRange);
6396 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
6397 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
6399 int64_t EvaluatedSGPRBlocks;
6400 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
6401 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
6402 static_cast<uint64_t>(EvaluatedSGPRBlocks)))
6403 return OutOfRangeError(SGPRRange);
6406 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
6407 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
6409 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
6410 return TokError(
"amdgpu_user_sgpr_count smaller than than implied by "
6411 "enabled user SGPRs");
6414 if (!isUInt<COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
6415 return TokError(
"too many user SGPRs enabled");
6419 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
6420 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT, getContext());
6422 if (!isUInt<COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_WIDTH>(
6424 return TokError(
"too many user SGPRs enabled");
6428 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
6429 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT, getContext());
6434 return TokError(
"Kernarg size should be resolvable");
6436 if (PreloadLength && kernarg_size &&
6437 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
6438 return TokError(
"Kernarg preload length + offset is larger than the "
6439 "kernarg segment size");
6442 if (!Seen.
contains(
".amdhsa_accum_offset"))
6443 return TokError(
".amdhsa_accum_offset directive is required");
6444 int64_t EvaluatedAccum;
6445 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
6446 uint64_t UEvaluatedAccum = EvaluatedAccum;
6447 if (AccumEvaluatable &&
6448 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
6449 return TokError(
"accum_offset should be in range [4..256] in "
6452 int64_t EvaluatedNumVGPR;
6453 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
6457 return TokError(
"accum_offset exceeds total VGPR allocation");
6463 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6464 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6470 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
6471 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
6474 if (IVersion.
Major >= 10 && IVersion.
Major < 12) {
6476 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6477 return TokError(
"shared_vgpr_count directive not valid on "
6478 "wavefront size 32");
6481 if (VGPRBlocksEvaluatable &&
6482 (SharedVGPRCount * 2 +
static_cast<uint64_t>(EvaluatedVGPRBlocks) >
6484 return TokError(
"shared_vgpr_count*2 + "
6485 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6490 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
6491 NextFreeVGPR, NextFreeSGPR,
6492 ReserveVCC, ReserveFlatScr);
6496bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6498 if (ParseAsAbsoluteExpression(Version))
6501 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
6505bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(
StringRef ID,
6509 if (
ID ==
"max_scratch_backing_memory_byte_size") {
6510 Parser.eatToEndOfStatement();
6516 if (!
C.ParseKernelCodeT(
ID, getParser(), Err)) {
6517 return TokError(Err.str());
6521 if (
ID ==
"enable_wavefront_size32") {
6524 return TokError(
"enable_wavefront_size32=1 is only allowed on GFX10+");
6525 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
6526 return TokError(
"enable_wavefront_size32=1 requires +WavefrontSize32");
6528 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
6529 return TokError(
"enable_wavefront_size32=0 requires +WavefrontSize64");
6533 if (
ID ==
"wavefront_size") {
6534 if (
C.wavefront_size == 5) {
6536 return TokError(
"wavefront_size=5 is only allowed on GFX10+");
6537 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
6538 return TokError(
"wavefront_size=5 requires +WavefrontSize32");
6539 }
else if (
C.wavefront_size == 6) {
6540 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
6541 return TokError(
"wavefront_size=6 requires +WavefrontSize64");
6548bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6558 if (!parseId(
ID,
"expected value identifier or .end_amd_kernel_code_t"))
6561 if (
ID ==
".end_amd_kernel_code_t")
6564 if (ParseAMDKernelCodeTValue(
ID, KernelCode))
6568 KernelCode.
validate(&getSTI(), getContext());
6569 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6574bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6576 if (!parseId(KernelName,
"expected symbol name"))
6579 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6582 KernelScope.initialize(getContext());
6586bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6587 if (!getSTI().getTargetTriple().isAMDGCN()) {
6588 return Error(getLoc(),
6589 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6593 auto TargetIDDirective = getLexer().getTok().getStringContents();
6594 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
6595 return Error(getParser().getTok().getLoc(),
"target id must match options");
6597 getTargetStreamer().EmitISAVersion();
6603bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6606 std::string HSAMetadataString;
6611 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6612 return Error(getLoc(),
"invalid HSA metadata");
6619bool AMDGPUAsmParser::ParseToEndDirective(
const char *AssemblerDirectiveBegin,
6620 const char *AssemblerDirectiveEnd,
6621 std::string &CollectString) {
6625 getLexer().setSkipSpace(
false);
6627 bool FoundEnd =
false;
6630 CollectStream << getTokenStr();
6634 if (trySkipId(AssemblerDirectiveEnd)) {
6639 CollectStream << Parser.parseStringToEndOfStatement()
6640 << getContext().getAsmInfo()->getSeparatorString();
6642 Parser.eatToEndOfStatement();
6645 getLexer().setSkipSpace(
true);
6648 return TokError(
Twine(
"expected directive ") +
6649 Twine(AssemblerDirectiveEnd) +
Twine(
" not found"));
6656bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6662 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6663 if (!PALMetadata->setFromString(
String))
6664 return Error(getLoc(),
"invalid PAL metadata");
6669bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6671 return Error(getLoc(),
6673 "not available on non-amdpal OSes")).str());
6676 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6677 PALMetadata->setLegacy();
6680 if (ParseAsAbsoluteExpression(Key)) {
6681 return TokError(
Twine(
"invalid value in ") +
6685 return TokError(
Twine(
"expected an even number of values in ") +
6688 if (ParseAsAbsoluteExpression(
Value)) {
6689 return TokError(
Twine(
"invalid value in ") +
6692 PALMetadata->setRegister(Key,
Value);
6701bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6702 if (getParser().checkForValidSection())
6706 SMLoc NameLoc = getLoc();
6707 if (getParser().parseIdentifier(
Name))
6708 return TokError(
"expected identifier in directive");
6711 if (getParser().parseComma())
6717 SMLoc SizeLoc = getLoc();
6718 if (getParser().parseAbsoluteExpression(
Size))
6721 return Error(SizeLoc,
"size must be non-negative");
6722 if (
Size > LocalMemorySize)
6723 return Error(SizeLoc,
"size is too large");
6725 int64_t Alignment = 4;
6727 SMLoc AlignLoc = getLoc();
6728 if (getParser().parseAbsoluteExpression(Alignment))
6731 return Error(AlignLoc,
"alignment must be a power of two");
6736 if (Alignment >= 1u << 31)
6737 return Error(AlignLoc,
"alignment is too large");
6743 Symbol->redefineIfPossible();
6744 if (!
Symbol->isUndefined())
6745 return Error(NameLoc,
"invalid symbol redefinition");
6747 getTargetStreamer().emitAMDGPULDS(Symbol,
Size,
Align(Alignment));
6751bool AMDGPUAsmParser::ParseDirective(
AsmToken DirectiveID) {
6755 if (IDVal ==
".amdhsa_kernel")
6756 return ParseDirectiveAMDHSAKernel();
6758 if (IDVal ==
".amdhsa_code_object_version")
6759 return ParseDirectiveAMDHSACodeObjectVersion();
6763 return ParseDirectiveHSAMetadata();
6765 if (IDVal ==
".amd_kernel_code_t")
6766 return ParseDirectiveAMDKernelCodeT();
6768 if (IDVal ==
".amdgpu_hsa_kernel")
6769 return ParseDirectiveAMDGPUHsaKernel();
6771 if (IDVal ==
".amd_amdgpu_isa")
6772 return ParseDirectiveISAVersion();
6776 Twine(
" directive is "
6777 "not available on non-amdhsa OSes"))
6782 if (IDVal ==
".amdgcn_target")
6783 return ParseDirectiveAMDGCNTarget();
6785 if (IDVal ==
".amdgpu_lds")
6786 return ParseDirectiveAMDGPULDS();
6789 return ParseDirectivePALMetadataBegin();
6792 return ParseDirectivePALMetadata();
6799 if (
MRI.regsOverlap(TTMP12_TTMP13_TTMP14_TTMP15, Reg))
6803 if (
MRI.regsOverlap(SGPR104_SGPR105, Reg))
6804 return hasSGPR104_SGPR105();
6807 case SRC_SHARED_BASE_LO:
6808 case SRC_SHARED_BASE:
6809 case SRC_SHARED_LIMIT_LO:
6810 case SRC_SHARED_LIMIT:
6811 case SRC_PRIVATE_BASE_LO:
6812 case SRC_PRIVATE_BASE:
6813 case SRC_PRIVATE_LIMIT_LO:
6814 case SRC_PRIVATE_LIMIT:
6816 case SRC_FLAT_SCRATCH_BASE_LO:
6817 case SRC_FLAT_SCRATCH_BASE_HI:
6818 return hasGloballyAddressableScratch();
6819 case SRC_POPS_EXITING_WAVE_ID:
6831 return (
isVI() ||
isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6860 if (
MRI.regsOverlap(SGPR102_SGPR103, Reg))
6861 return hasSGPR102_SGPR103();
6874 Res = MatchOperandParserImpl(
Operands, Mnemonic);
6886 SMLoc LBraceLoc = getLoc();
6891 auto Loc = getLoc();
6894 Error(Loc,
"expected a register");
6898 RBraceLoc = getLoc();
6903 "expected a comma or a closing square bracket"))
6907 if (
Operands.size() - Prefix > 1) {
6909 AMDGPUOperand::CreateToken(
this,
"[", LBraceLoc));
6910 Operands.push_back(AMDGPUOperand::CreateToken(
this,
"]", RBraceLoc));
6921 setForcedEncodingSize(0);
6922 setForcedDPP(
false);
6923 setForcedSDWA(
false);
6925 if (
Name.consume_back(
"_e64_dpp")) {
6927 setForcedEncodingSize(64);
6930 if (
Name.consume_back(
"_e64")) {
6931 setForcedEncodingSize(64);
6934 if (
Name.consume_back(
"_e32")) {
6935 setForcedEncodingSize(32);
6938 if (
Name.consume_back(
"_dpp")) {
6942 if (
Name.consume_back(
"_sdwa")) {
6943 setForcedSDWA(
true);
6951 unsigned VariantID);
6963 Operands.push_back(AMDGPUOperand::CreateToken(
this,
Name, NameLoc));
6965 bool IsMIMG =
Name.starts_with(
"image_");
6968 OperandMode Mode = OperandMode_Default;
6970 Mode = OperandMode_NSA;
6974 checkUnsupportedInstruction(
Name, NameLoc);
6975 if (!Parser.hasPendingError()) {
6978 :
"not a valid operand.";
6979 Error(getLoc(), Msg);
7001 if (!trySkipId(
Name))
7004 Operands.push_back(AMDGPUOperand::CreateToken(
this,
Name, S));
7008ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
const char *Prefix,
7019 std::function<
bool(int64_t &)> ConvertResult) {
7027 if (ConvertResult && !ConvertResult(
Value)) {
7031 Operands.push_back(AMDGPUOperand::CreateImm(
this,
Value, S, ImmTy));
7035ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
7037 bool (*ConvertResult)(int64_t &)) {
7046 const unsigned MaxSize = 4;
7050 for (
int I = 0; ; ++
I) {
7052 SMLoc Loc = getLoc();
7056 if (
Op != 0 &&
Op != 1)
7064 if (
I + 1 == MaxSize)
7065 return Error(getLoc(),
"expected a closing square bracket");
7071 Operands.push_back(AMDGPUOperand::CreateImm(
this, Val, S, ImmTy));
7077 AMDGPUOperand::ImmTy ImmTy) {
7081 if (trySkipId(
Name)) {
7083 }
else if (trySkipId(
"no",
Name)) {
7090 return Error(S,
"r128 modifier is not supported on this GPU");
7092 return Error(S,
"a16 modifier is not supported on this GPU");
7094 if (
isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
7095 ImmTy = AMDGPUOperand::ImmTyR128A16;
7097 Operands.push_back(AMDGPUOperand::CreateImm(
this, Bit, S, ImmTy));
7102 bool &Disabling)
const {
7103 Disabling =
Id.consume_front(
"no");
7123 SMLoc StringLoc = getLoc();
7125 int64_t CPolVal = 0;
7145 ResScope = parseScope(
Operands, Scope);
7158 if (trySkipId(
"nv")) {
7162 }
else if (trySkipId(
"no",
"nv")) {
7169 if (trySkipId(
"scale_offset")) {
7173 }
else if (trySkipId(
"no",
"scale_offset")) {
7186 Operands.push_back(AMDGPUOperand::CreateImm(
this, CPolVal, StringLoc,
7187 AMDGPUOperand::ImmTyCPol));
7192 SMLoc OpLoc = getLoc();
7193 unsigned Enabled = 0, Seen = 0;
7197 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
7204 return Error(S,
"dlc modifier is not supported on this GPU");
7207 return Error(S,
"scc modifier is not supported on this GPU");
7210 return Error(S,
"duplicate cache policy modifier");
7222 AMDGPUOperand::CreateImm(
this,
Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
7232 Operands,
"scope", {
"SCOPE_CU",
"SCOPE_SE",
"SCOPE_DEV",
"SCOPE_SYS"},
7250 if (
Value ==
"TH_DEFAULT")
7252 else if (
Value ==
"TH_STORE_LU" ||
Value ==
"TH_LOAD_WB" ||
7253 Value ==
"TH_LOAD_NT_WB") {
7254 return Error(StringLoc,
"invalid th value");
7255 }
else if (
Value.consume_front(
"TH_ATOMIC_")) {
7257 }
else if (
Value.consume_front(
"TH_LOAD_")) {
7259 }
else if (
Value.consume_front(
"TH_STORE_")) {
7262 return Error(StringLoc,
"invalid th value");
7265 if (
Value ==
"BYPASS")
7296 if (TH == 0xffffffff)
7297 return Error(StringLoc,
"invalid th value");
7304 AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
7305 AMDGPUOperand::ImmTy ImmT, int64_t
Default = 0,
7306 std::optional<unsigned> InsertAt = std::nullopt) {
7307 auto i = OptionalIdx.find(ImmT);
7308 if (i != OptionalIdx.end()) {
7309 unsigned Idx = i->second;
7310 const AMDGPUOperand &
Op =
7311 static_cast<const AMDGPUOperand &
>(*
Operands[
Idx]);
7315 Op.addImmOperands(Inst, 1);
7317 if (InsertAt.has_value())
7330 StringLoc = getLoc();
7335ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7341 SMLoc StringLoc = getLoc();
7345 Value = getTokenStr();
7349 if (
Value == Ids[IntVal])
7354 if (IntVal < 0 || IntVal >= (int64_t)Ids.
size())
7360ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7362 AMDGPUOperand::ImmTy
Type) {
7368 Operands.push_back(AMDGPUOperand::CreateImm(
this, IntVal, S,
Type));
7377bool AMDGPUAsmParser::tryParseFmt(
const char *Pref,
7381 SMLoc Loc = getLoc();
7383 auto Res = parseIntWithPrefix(Pref, Val);
7389 if (Val < 0 || Val > MaxVal) {
7399 AMDGPUOperand::ImmTy ImmTy) {
7400 const char *Pref =
"index_key";
7402 SMLoc Loc = getLoc();
7403 auto Res = parseIntWithPrefix(Pref, ImmVal);
7407 if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
7408 ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
7409 (ImmVal < 0 || ImmVal > 1))
7412 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
7415 Operands.push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc, ImmTy));
7420 return tryParseIndexKey(
Operands, AMDGPUOperand::ImmTyIndexKey8bit);
7424 return tryParseIndexKey(
Operands, AMDGPUOperand::ImmTyIndexKey16bit);
7428 return tryParseIndexKey(
Operands, AMDGPUOperand::ImmTyIndexKey32bit);
7433 AMDGPUOperand::ImmTy
Type) {
7435 {
"MATRIX_FMT_FP8",
"MATRIX_FMT_BF8",
7436 "MATRIX_FMT_FP6",
"MATRIX_FMT_BF6",
7442 return tryParseMatrixFMT(
Operands,
"matrix_a_fmt",
7443 AMDGPUOperand::ImmTyMatrixAFMT);
7447 return tryParseMatrixFMT(
Operands,
"matrix_b_fmt",
7448 AMDGPUOperand::ImmTyMatrixBFMT);
7453 AMDGPUOperand::ImmTy
Type) {
7454 return parseStringOrIntWithPrefix(
7459 return tryParseMatrixScale(
Operands,
"matrix_a_scale",
7460 AMDGPUOperand::ImmTyMatrixAScale);
7464 return tryParseMatrixScale(
Operands,
"matrix_b_scale",
7465 AMDGPUOperand::ImmTyMatrixBScale);
7470 AMDGPUOperand::ImmTy
Type) {
7471 return parseStringOrIntWithPrefix(
7473 {
"MATRIX_SCALE_FMT_E8",
"MATRIX_SCALE_FMT_E5M3",
"MATRIX_SCALE_FMT_E4M3"},
7478 return tryParseMatrixScaleFmt(
Operands,
"matrix_a_scale_fmt",
7479 AMDGPUOperand::ImmTyMatrixAScaleFmt);
7483 return tryParseMatrixScaleFmt(
Operands,
"matrix_b_scale_fmt",
7484 AMDGPUOperand::ImmTyMatrixBScaleFmt);
7489ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
7496 for (
int I = 0;
I < 2; ++
I) {
7497 if (Dfmt == DFMT_UNDEF && !tryParseFmt(
"dfmt", DFMT_MAX, Dfmt))
7500 if (Nfmt == NFMT_UNDEF && !tryParseFmt(
"nfmt", NFMT_MAX, Nfmt))
7505 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
7511 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
7514 Dfmt = (Dfmt ==
DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7515 Nfmt = (Nfmt ==
NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7521ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
7526 if (!tryParseFmt(
"format", UFMT_MAX, Fmt))
7529 if (Fmt == UFMT_UNDEF)
7536bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
7544 if (Format != DFMT_UNDEF) {
7550 if (Format != NFMT_UNDEF) {
7555 Error(Loc,
"unsupported format");
7566 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
7571 SMLoc Loc = getLoc();
7572 if (!parseId(Str,
"expected a format string") ||
7573 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
7575 if (Dfmt == DFMT_UNDEF)
7576 return Error(Loc,
"duplicate numeric format");
7577 if (Nfmt == NFMT_UNDEF)
7578 return Error(Loc,
"duplicate data format");
7581 Dfmt = (Dfmt ==
DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7582 Nfmt = (Nfmt ==
NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7586 if (Ufmt == UFMT_UNDEF)
7587 return Error(FormatLoc,
"unsupported format");
7602 if (Id == UFMT_UNDEF)
7606 return Error(Loc,
"unified format is not supported on this GPU");
7612ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
7614 SMLoc Loc = getLoc();
7619 return Error(Loc,
"out of range format");
7624ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
7632 SMLoc Loc = getLoc();
7633 if (!parseId(FormatStr,
"expected a format string"))
7636 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
7638 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
7648 return parseNumericFormat(Format);
7656 SMLoc Loc = getLoc();
7666 AMDGPUOperand::CreateImm(
this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7685 Res = parseSymbolicOrNumericFormat(Format);
7690 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands[
Size - 2]);
7691 assert(
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7698 return Error(getLoc(),
"duplicate format");
7704 parseIntWithPrefix(
"offset",
Operands, AMDGPUOperand::ImmTyOffset);
7706 Res = parseIntWithPrefix(
"inst_offset",
Operands,
7707 AMDGPUOperand::ImmTyInstOffset);
7714 parseNamedBit(
"r128",
Operands, AMDGPUOperand::ImmTyR128A16);
7716 Res = parseNamedBit(
"a16",
Operands, AMDGPUOperand::ImmTyA16);
7722 parseIntWithPrefix(
"blgp",
Operands, AMDGPUOperand::ImmTyBLGP);
7725 parseOperandArrayWithPrefix(
"neg",
Operands, AMDGPUOperand::ImmTyBLGP);
7735 OptionalImmIndexMap OptionalIdx;
7737 unsigned OperandIdx[4];
7738 unsigned EnMask = 0;
7741 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
7742 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
7747 OperandIdx[SrcIdx] = Inst.
size();
7748 Op.addRegOperands(Inst, 1);
7755 OperandIdx[SrcIdx] = Inst.
size();
7761 if (
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7762 Op.addImmOperands(Inst, 1);
7766 if (
Op.isToken() && (
Op.getToken() ==
"done" ||
Op.getToken() ==
"row_en"))
7770 OptionalIdx[
Op.getImmTy()] = i;
7776 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7783 for (
auto i = 0; i < SrcIdx; ++i) {
7785 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7810 IntVal =
encode(ISA, IntVal, CntVal);
7811 if (CntVal !=
decode(ISA, IntVal)) {
7813 IntVal =
encode(ISA, IntVal, -1);
7821bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7823 SMLoc CntLoc = getLoc();
7831 SMLoc ValLoc = getLoc();
7840 if (CntName ==
"vmcnt" || CntName ==
"vmcnt_sat") {
7842 }
else if (CntName ==
"expcnt" || CntName ==
"expcnt_sat") {
7844 }
else if (CntName ==
"lgkmcnt" || CntName ==
"lgkmcnt_sat") {
7847 Error(CntLoc,
"invalid counter name " + CntName);
7852 Error(ValLoc,
"too large value for " + CntName);
7861 Error(getLoc(),
"expected a counter name");
7888bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7889 SMLoc FieldLoc = getLoc();
7895 SMLoc ValueLoc = getLoc();
7902 if (FieldName ==
"instid0") {
7904 }
else if (FieldName ==
"instskip") {
7906 }
else if (FieldName ==
"instid1") {
7909 Error(FieldLoc,
"invalid field name " + FieldName);
7928 .
Case(
"VALU_DEP_1", 1)
7929 .
Case(
"VALU_DEP_2", 2)
7930 .
Case(
"VALU_DEP_3", 3)
7931 .
Case(
"VALU_DEP_4", 4)
7932 .
Case(
"TRANS32_DEP_1", 5)
7933 .
Case(
"TRANS32_DEP_2", 6)
7934 .
Case(
"TRANS32_DEP_3", 7)
7935 .
Case(
"FMA_ACCUM_CYCLE_1", 8)
7936 .
Case(
"SALU_CYCLE_1", 9)
7937 .
Case(
"SALU_CYCLE_2", 10)
7938 .
Case(
"SALU_CYCLE_3", 11)
7946 Delay |=
Value << Shift;
7956 if (!parseDelay(Delay))
7964 Operands.push_back(AMDGPUOperand::CreateImm(
this, Delay, S));
7969AMDGPUOperand::isSWaitCnt()
const {
7973bool AMDGPUOperand::isSDelayALU()
const {
return isImm(); }
7979void AMDGPUAsmParser::depCtrError(
SMLoc Loc,
int ErrorId,
7983 Error(Loc,
Twine(
"invalid counter name ", DepCtrName));
7986 Error(Loc,
Twine(DepCtrName,
" is not supported on this GPU"));
7989 Error(Loc,
Twine(
"duplicate counter name ", DepCtrName));
7992 Error(Loc,
Twine(
"invalid value for ", DepCtrName));
7999bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr,
unsigned &UsedOprMask) {
8003 SMLoc DepCtrLoc = getLoc();
8014 unsigned PrevOprMask = UsedOprMask;
8015 int CntVal =
encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
8018 depCtrError(DepCtrLoc, CntVal, DepCtrName);
8027 Error(getLoc(),
"expected a counter name");
8032 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
8033 DepCtr = (DepCtr & ~CntValMask) | CntVal;
8041 SMLoc Loc = getLoc();
8044 unsigned UsedOprMask = 0;
8046 if (!parseDepCtr(DepCtr, UsedOprMask))
8054 Operands.push_back(AMDGPUOperand::CreateImm(
this, DepCtr, Loc));
8058bool AMDGPUOperand::isDepCtr()
const {
return isS16Imm(); }
8064ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
8066 OperandInfoTy &Width) {
8073 HwReg.Loc = getLoc();
8076 HwReg.IsSymbolic =
true;
8078 }
else if (!
parseExpr(HwReg.Val,
"a register name")) {
8086 if (!skipToken(
AsmToken::Comma,
"expected a comma or a closing parenthesis"))
8096 Width.Loc = getLoc();
8108 SMLoc Loc = getLoc();
8110 StructuredOpField HwReg(
"id",
"hardware register", HwregId::Width,
8112 StructuredOpField
Offset(
"offset",
"bit offset", HwregOffset::Width,
8113 HwregOffset::Default);
8114 struct : StructuredOpField {
8115 using StructuredOpField::StructuredOpField;
8116 bool validate(AMDGPUAsmParser &Parser)
const override {
8118 return Error(Parser,
"only values from 1 to 32 are legal");
8121 } Width(
"size",
"bitfield width", HwregSize::Width, HwregSize::Default);
8125 Res = parseHwregFunc(HwReg,
Offset, Width);
8128 if (!validateStructuredOpFields({&HwReg, &
Offset, &Width}))
8130 ImmVal = HwregEncoding::encode(HwReg.Val,
Offset.Val, Width.Val);
8134 parseExpr(ImmVal,
"a hwreg macro, structured immediate"))
8140 if (!isUInt<16>(ImmVal))
8141 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
8143 AMDGPUOperand::CreateImm(
this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
8147bool AMDGPUOperand::isHwreg()
const {
8148 return isImmTy(ImmTyHwreg);
8156AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
8158 OperandInfoTy &Stream) {
8164 Msg.IsSymbolic =
true;
8166 }
else if (!
parseExpr(Msg.Val,
"a message name")) {
8171 Op.IsDefined =
true;
8174 (
Op.Val =
getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
8177 }
else if (!
parseExpr(
Op.Val,
"an operation name")) {
8182 Stream.IsDefined =
true;
8183 Stream.Loc = getLoc();
8193AMDGPUAsmParser::validateSendMsg(
const OperandInfoTy &Msg,
8194 const OperandInfoTy &
Op,
8195 const OperandInfoTy &Stream) {
8201 bool Strict = Msg.IsSymbolic;
8205 Error(Msg.Loc,
"specified message id is not supported on this GPU");
8210 Error(Msg.Loc,
"invalid message id");
8216 Error(
Op.Loc,
"message does not support operations");
8218 Error(Msg.Loc,
"missing message operation");
8224 Error(
Op.Loc,
"specified operation id is not supported on this GPU");
8226 Error(
Op.Loc,
"invalid operation id");
8231 Error(Stream.Loc,
"message operation does not support streams");
8235 Error(Stream.Loc,
"invalid message stream id");
8245 SMLoc Loc = getLoc();
8249 OperandInfoTy
Op(OP_NONE_);
8250 OperandInfoTy Stream(STREAM_ID_NONE_);
8251 if (parseSendMsgBody(Msg,
Op, Stream) &&
8252 validateSendMsg(Msg,
Op, Stream)) {
8257 }
else if (
parseExpr(ImmVal,
"a sendmsg macro")) {
8258 if (ImmVal < 0 || !isUInt<16>(ImmVal))
8259 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
8264 Operands.push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
8268bool AMDGPUOperand::isSendMsg()
const {
8269 return isImmTy(ImmTySendMsg);
8290 return Error(S,
"invalid interpolation slot");
8292 Operands.push_back(AMDGPUOperand::CreateImm(
this, Slot, S,
8293 AMDGPUOperand::ImmTyInterpSlot));
8304 if (!Str.starts_with(
"attr"))
8305 return Error(S,
"invalid interpolation attribute");
8315 return Error(S,
"invalid or missing interpolation attribute channel");
8317 Str = Str.drop_back(2).drop_front(4);
8320 if (Str.getAsInteger(10, Attr))
8321 return Error(S,
"invalid or missing interpolation attribute number");
8324 return Error(S,
"out of bounds interpolation attribute number");
8328 Operands.push_back(AMDGPUOperand::CreateImm(
this, Attr, S,
8329 AMDGPUOperand::ImmTyInterpAttr));
8330 Operands.push_back(AMDGPUOperand::CreateImm(
8331 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
8350 return Error(S, (
Id == ET_INVALID)
8351 ?
"invalid exp target"
8352 :
"exp target is not supported on this GPU");
8354 Operands.push_back(AMDGPUOperand::CreateImm(
this,
Id, S,
8355 AMDGPUOperand::ImmTyExpTgt));
8370 return isId(getToken(),
Id);
8375 return getTokenKind() ==
Kind;
8378StringRef AMDGPUAsmParser::getId()
const {
8405 if (isId(
Id) && peekToken().is(Kind)) {
8415 if (isToken(Kind)) {
8425 if (!trySkipToken(Kind)) {
8426 Error(getLoc(), ErrMsg);
8437 if (Parser.parseExpression(Expr))
8440 if (Expr->evaluateAsAbsolute(
Imm))
8444 Error(S,
"expected absolute expression");
8447 Twine(
" or an absolute expression"));
8457 if (Parser.parseExpression(Expr))
8461 if (Expr->evaluateAsAbsolute(IntVal)) {
8462 Operands.push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
8464 Operands.push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
8472 Val = getToken().getStringContents();
8476 Error(getLoc(), ErrMsg);
8483 Val = getTokenStr();
8487 if (!ErrMsg.
empty())
8488 Error(getLoc(), ErrMsg);
8493AMDGPUAsmParser::getToken()
const {
8494 return Parser.getTok();
8497AsmToken AMDGPUAsmParser::peekToken(
bool ShouldSkipSpace) {
8500 : getLexer().peekTok(ShouldSkipSpace);
8505 auto TokCount = getLexer().peekTokens(Tokens);
8512AMDGPUAsmParser::getTokenKind()
const {
8517AMDGPUAsmParser::getLoc()
const {
8518 return getToken().getLoc();
8522AMDGPUAsmParser::getTokenStr()
const {
8523 return getToken().getString();
8527AMDGPUAsmParser::lex() {
8532 return ((AMDGPUOperand &)*
Operands[0]).getStartLoc();
8536AMDGPUAsmParser::getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
8538 for (
unsigned i =
Operands.size() - 1; i > 0; --i) {
8539 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
8541 return Op.getStartLoc();
8547AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy
Type,
8549 auto Test = [=](
const AMDGPUOperand&
Op) {
return Op.isImmTy(
Type); };
8555 auto Test = [=](
const AMDGPUOperand&
Op) {
8556 return Op.isRegKind() &&
Op.getReg() ==
Reg;
8562 bool SearchMandatoryLiterals)
const {
8563 auto Test = [](
const AMDGPUOperand&
Op) {
8564 return Op.IsImmKindLiteral() ||
Op.isExpr();
8567 if (SearchMandatoryLiterals && Loc == getInstLoc(
Operands))
8568 Loc = getMandatoryLitLoc(
Operands);
8573 auto Test = [](
const AMDGPUOperand &
Op) {
8574 return Op.IsImmKindMandatoryLiteral();
8581 auto Test = [](
const AMDGPUOperand&
Op) {
8582 return Op.isImmKindConst();
8599 SMLoc IdLoc = getLoc();
8605 find_if(Fields, [
Id](StructuredOpField *
F) {
return F->Id ==
Id; });
8606 if (
I == Fields.
end())
8607 return Error(IdLoc,
"unknown field");
8608 if ((*I)->IsDefined)
8609 return Error(IdLoc,
"duplicate field");
8612 (*I)->Loc = getLoc();
8615 (*I)->IsDefined =
true;
8622bool AMDGPUAsmParser::validateStructuredOpFields(
8624 return all_of(Fields, [
this](
const StructuredOpField *
F) {
8625 return F->validate(*
this);
8636 const unsigned OrMask,
8637 const unsigned XorMask) {
8640 return BITMASK_PERM_ENC |
8641 (AndMask << BITMASK_AND_SHIFT) |
8642 (OrMask << BITMASK_OR_SHIFT) |
8643 (XorMask << BITMASK_XOR_SHIFT);
8646bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &
Op,
const unsigned MinVal,
8647 const unsigned MaxVal,
8656 if (Op < MinVal || Op > MaxVal) {
8665AMDGPUAsmParser::parseSwizzleOperands(
const unsigned OpNum, int64_t*
Op,
8666 const unsigned MinVal,
8667 const unsigned MaxVal,
8670 for (
unsigned i = 0; i < OpNum; ++i) {
8671 if (!parseSwizzleOperand(
Op[i], MinVal, MaxVal, ErrMsg, Loc))
8679AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &
Imm) {
8683 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8684 "expected a 2-bit lane id")) {
8695AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &
Imm) {
8702 if (!parseSwizzleOperand(GroupSize,
8704 "group size must be in the interval [2,32]",
8709 Error(Loc,
"group size must be a power of two");
8712 if (parseSwizzleOperand(LaneIdx,
8714 "lane id must be in the interval [0,group size - 1]",
8723AMDGPUAsmParser::parseSwizzleReverse(int64_t &
Imm) {
8729 if (!parseSwizzleOperand(GroupSize,
8731 "group size must be in the interval [2,32]",
8736 Error(Loc,
"group size must be a power of two");
8745AMDGPUAsmParser::parseSwizzleSwap(int64_t &
Imm) {
8751 if (!parseSwizzleOperand(GroupSize,
8753 "group size must be in the interval [1,16]",
8758 Error(Loc,
"group size must be a power of two");
8767AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &
Imm) {
8775 SMLoc StrLoc = getLoc();
8776 if (!parseString(Ctl)) {
8779 if (Ctl.
size() != BITMASK_WIDTH) {
8780 Error(StrLoc,
"expected a 5-character mask");
8784 unsigned AndMask = 0;
8785 unsigned OrMask = 0;
8786 unsigned XorMask = 0;
8788 for (
size_t i = 0; i < Ctl.
size(); ++i) {
8792 Error(StrLoc,
"invalid mask");
8813bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &
Imm) {
8817 Error(getLoc(),
"FFT mode swizzle not supported on this GPU");
8823 if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
8824 "FFT swizzle must be in the interval [0," +
8833bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &
Imm) {
8837 Error(getLoc(),
"Rotate mode swizzle not supported on this GPU");
8844 if (!parseSwizzleOperand(
Direction, 0, 1,
8845 "direction must be 0 (left) or 1 (right)", Loc))
8849 if (!parseSwizzleOperand(
8850 RotateSize, 0, ROTATE_MAX_SIZE,
8851 "number of threads to rotate must be in the interval [0," +
8857 (RotateSize << ROTATE_SIZE_SHIFT);
8862AMDGPUAsmParser::parseSwizzleOffset(int64_t &
Imm) {
8864 SMLoc OffsetLoc = getLoc();
8869 if (!isUInt<16>(
Imm)) {
8870 Error(OffsetLoc,
"expected a 16-bit offset");
8877AMDGPUAsmParser::parseSwizzleMacro(int64_t &
Imm) {
8882 SMLoc ModeLoc = getLoc();
8885 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8886 Ok = parseSwizzleQuadPerm(
Imm);
8887 }
else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8888 Ok = parseSwizzleBitmaskPerm(
Imm);
8889 }
else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8890 Ok = parseSwizzleBroadcast(
Imm);
8891 }
else if (trySkipId(IdSymbolic[ID_SWAP])) {
8892 Ok = parseSwizzleSwap(
Imm);
8893 }
else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8894 Ok = parseSwizzleReverse(
Imm);
8895 }
else if (trySkipId(IdSymbolic[ID_FFT])) {
8896 Ok = parseSwizzleFFT(
Imm);
8897 }
else if (trySkipId(IdSymbolic[ID_ROTATE])) {
8898 Ok = parseSwizzleRotate(
Imm);
8900 Error(ModeLoc,
"expected a swizzle mode");
8903 return Ok && skipToken(
AsmToken::RParen,
"expected a closing parentheses");
8913 if (trySkipId(
"offset")) {
8917 if (trySkipId(
"swizzle")) {
8918 Ok = parseSwizzleMacro(
Imm);
8920 Ok = parseSwizzleOffset(
Imm);
8924 Operands.push_back(AMDGPUOperand::CreateImm(
this,
Imm, S, AMDGPUOperand::ImmTySwizzle));
8932AMDGPUOperand::isSwizzle()
const {
8933 return isImmTy(ImmTySwizzle);
8940int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8954 for (
unsigned ModeId = ID_MIN; ModeId <=
ID_MAX; ++ModeId) {
8955 if (trySkipId(IdSymbolic[ModeId])) {
8963 "expected a VGPR index mode or a closing parenthesis" :
8964 "expected a VGPR index mode");
8969 Error(S,
"duplicate VGPR index mode");
8977 "expected a comma or a closing parenthesis"))
8992 Imm = parseGPRIdxMacro();
8996 if (getParser().parseAbsoluteExpression(
Imm))
8998 if (
Imm < 0 || !isUInt<4>(
Imm))
8999 return Error(S,
"invalid immediate: only 4-bit values are legal");
9003 AMDGPUOperand::CreateImm(
this,
Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
9007bool AMDGPUOperand::isGPRIdxMode()
const {
9008 return isImmTy(ImmTyGprIdxMode);
9020 if (isRegister() || isModifier())
9027 assert(Opr.isImm() || Opr.isExpr());
9028 SMLoc Loc = Opr.getStartLoc();
9032 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
9033 Error(Loc,
"expected an absolute expression or a label");
9034 }
else if (Opr.isImm() && !Opr.isS16Imm()) {
9035 Error(Loc,
"expected a 16-bit signed jump offset");
9053void AMDGPUAsmParser::cvtMubufImpl(
MCInst &Inst,
9056 OptionalImmIndexMap OptionalIdx;
9057 unsigned FirstOperandIdx = 1;
9058 bool IsAtomicReturn =
false;
9065 for (
unsigned i = FirstOperandIdx, e =
Operands.size(); i != e; ++i) {
9066 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
9070 Op.addRegOperands(Inst, 1);
9074 if (IsAtomicReturn && i == FirstOperandIdx)
9075 Op.addRegOperands(Inst, 1);
9080 if (
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
9081 Op.addImmOperands(Inst, 1);
9093 OptionalIdx[
Op.getImmTy()] = i;
9104bool AMDGPUOperand::isSMRDOffset8()
const {
9105 return isImmLiteral() && isUInt<8>(
getImm());
9108bool AMDGPUOperand::isSMEMOffset()
const {
9110 return isImmLiteral();
9113bool AMDGPUOperand::isSMRDLiteralOffset()
const {
9116 return isImmLiteral() && !isUInt<8>(
getImm()) && isUInt<32>(
getImm());
9148bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
9149 if (BoundCtrl == 0 || BoundCtrl == 1) {
9157void AMDGPUAsmParser::onBeginOfFile() {
9158 if (!getParser().getStreamer().getTargetStreamer() ||
9162 if (!getTargetStreamer().getTargetID())
9163 getTargetStreamer().initializeTargetID(getSTI(),
9164 getSTI().getFeatureString());
9167 getTargetStreamer().EmitDirectiveAMDGCNTarget();
9175bool AMDGPUAsmParser::parsePrimaryExpr(
const MCExpr *&Res,
SMLoc &EndLoc) {
9181 .
Case(
"max", AGVK::AGVK_Max)
9182 .
Case(
"or", AGVK::AGVK_Or)
9183 .
Case(
"extrasgprs", AGVK::AGVK_ExtraSGPRs)
9184 .
Case(
"totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
9185 .
Case(
"alignto", AGVK::AGVK_AlignTo)
9186 .
Case(
"occupancy", AGVK::AGVK_Occupancy)
9196 if (Exprs.
empty()) {
9197 Error(getToken().getLoc(),
9198 "empty " +
Twine(TokenId) +
" expression");
9201 if (CommaCount + 1 != Exprs.
size()) {
9202 Error(getToken().getLoc(),
9203 "mismatch of commas in " +
Twine(TokenId) +
" expression");
9210 if (getParser().parseExpression(Expr, EndLoc))
9214 if (LastTokenWasComma)
9217 Error(getToken().getLoc(),
9218 "unexpected token in " +
Twine(TokenId) +
" expression");
9224 return getParser().parsePrimaryExpr(Res, EndLoc,
nullptr);
9229 if (
Name ==
"mul") {
9230 return parseIntWithPrefix(
"mul",
Operands,
9234 if (
Name ==
"div") {
9235 return parseIntWithPrefix(
"div",
Operands,
9246 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9251 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9252 AMDGPU::OpName::src2};
9260 int DstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
9265 int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0_modifiers);
9267 if (
DstOp.isReg() &&
9268 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(
DstOp.
getReg())) {
9272 if ((OpSel & (1 << SrcNum)) != 0)
9278void AMDGPUAsmParser::cvtVOP3OpSel(
MCInst &Inst,
9285 OptionalImmIndexMap &OptionalIdx) {
9286 cvtVOP3P(Inst,
Operands, OptionalIdx);
9295 &&
Desc.NumOperands > (OpNum + 1)
9297 &&
Desc.operands()[OpNum + 1].RegClass != -1
9299 &&
Desc.getOperandConstraint(OpNum + 1,
9300 MCOI::OperandConstraint::TIED_TO) == -1;
9305 OptionalImmIndexMap OptionalIdx;
9310 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9311 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
9314 for (
unsigned E =
Operands.size();
I != E; ++
I) {
9315 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
9317 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9318 }
else if (
Op.isInterpSlot() ||
Op.isInterpAttr() ||
9319 Op.isInterpAttrChan()) {
9321 }
else if (
Op.isImmModifier()) {
9322 OptionalIdx[
Op.getImmTy()] =
I;
9330 AMDGPUOperand::ImmTyHigh);
9334 AMDGPUOperand::ImmTyClamp);
9338 AMDGPUOperand::ImmTyOModSI);
9343 OptionalImmIndexMap OptionalIdx;
9348 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9349 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
9352 for (
unsigned E =
Operands.size();
I != E; ++
I) {
9353 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
9355 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9356 }
else if (
Op.isImmModifier()) {
9357 OptionalIdx[
Op.getImmTy()] =
I;
9365 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9374 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9375 AMDGPU::OpName::src2};
9376 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9377 AMDGPU::OpName::src1_modifiers,
9378 AMDGPU::OpName::src2_modifiers};
9382 for (
int J = 0; J < 3; ++J) {
9383 int OpIdx = AMDGPU::getNamedOperandIdx(
Opc, Ops[J]);
9387 int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
9390 if ((OpSel & (1 << J)) != 0)
9392 if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
9393 (OpSel & (1 << 3)) != 0)
9399void AMDGPUAsmParser::cvtScaledMFMA(
MCInst &Inst,
9401 OptionalImmIndexMap OptionalIdx;
9404 int CbszOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::cbsz);
9408 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J)
9409 static_cast<AMDGPUOperand &
>(*
Operands[
I++]).addRegOperands(Inst, 1);
9411 for (
unsigned E =
Operands.size();
I != E; ++
I) {
9412 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands[
I]);
9417 if (NumOperands == CbszOpIdx) {
9422 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9423 }
else if (
Op.isImmModifier()) {
9424 OptionalIdx[
Op.getImmTy()] =
I;
9426 Op.addRegOrImmOperands(Inst, 1);
9431 auto CbszIdx = OptionalIdx.find(AMDGPUOperand::ImmTyCBSZ);
9432 if (CbszIdx != OptionalIdx.end()) {
9433 int CbszVal = ((AMDGPUOperand &)*
Operands[CbszIdx->second]).getImm();
9437 int BlgpOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::blgp);
9438 auto BlgpIdx = OptionalIdx.find(AMDGPUOperand::ImmTyBLGP);
9439 if (BlgpIdx != OptionalIdx.end()) {
9440 int BlgpVal = ((AMDGPUOperand &)*
Operands[BlgpIdx->second]).getImm();
9451 auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
9452 if (OpselIdx != OptionalIdx.end()) {
9453 OpSel =
static_cast<const AMDGPUOperand &
>(*
Operands[OpselIdx->second])
9457 unsigned OpSelHi = 0;
9458 auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
9459 if (OpselHiIdx != OptionalIdx.end()) {
9460 OpSelHi =
static_cast<const AMDGPUOperand &
>(*
Operands[OpselHiIdx->second])
9463 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9464 AMDGPU::OpName::src1_modifiers};
9466 for (
unsigned J = 0; J < 2; ++J) {
9467 unsigned ModVal = 0;
9468 if (OpSel & (1 << J))
9470 if (OpSelHi & (1 << J))
9473 const int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
9479 OptionalImmIndexMap &OptionalIdx) {
9484 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9485 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
9488 for (
unsigned E =
Operands.size();
I != E; ++
I) {
9489 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
9491 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9492 }
else if (
Op.isImmModifier()) {
9493 OptionalIdx[
Op.getImmTy()] =
I;
9495 Op.addRegOrImmOperands(Inst, 1);
9501 AMDGPUOperand::ImmTyScaleSel);
9505 AMDGPUOperand::ImmTyClamp);
9511 AMDGPUOperand::ImmTyByteSel);
9516 AMDGPUOperand::ImmTyOModSI);
9523 auto *it = Inst.
begin();
9524 std::advance(it, AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2_modifiers));
9533 OptionalImmIndexMap OptionalIdx;
9534 cvtVOP3(Inst,
Operands, OptionalIdx);
9538 OptionalImmIndexMap &OptIdx) {
9544 if (
Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
9545 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
9546 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
9547 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
9548 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
9549 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
9557 !(
Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx12 ||
9558 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx12 ||
9559 Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx12 ||
9560 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx12 ||
9561 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp_gfx12 ||
9562 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp_gfx12 ||
9563 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp8_gfx12 ||
9564 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp8_gfx12 ||
9565 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
9566 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9567 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp_gfx1250 ||
9568 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp8_gfx1250 ||
9569 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9570 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9571 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp_gfx1250 ||
9572 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp_gfx1250 ||
9573 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp8_gfx1250 ||
9574 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp8_gfx1250 ||
9575 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_gfx1250 ||
9576 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_gfx1250 ||
9577 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp_gfx1250 ||
9578 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp_gfx1250 ||
9579 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp8_gfx1250 ||
9580 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp8_gfx1250 ||
9581 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_gfx1250 ||
9582 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_gfx1250)) {
9586 int BitOp3Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::bitop3);
9587 if (BitOp3Idx != -1) {
9594 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9595 if (OpSelIdx != -1) {
9599 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel_hi);
9600 if (OpSelHiIdx != -1) {
9607 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_a_fmt);
9608 if (MatrixAFMTIdx != -1) {
9610 AMDGPUOperand::ImmTyMatrixAFMT, 0);
9614 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_b_fmt);
9615 if (MatrixBFMTIdx != -1) {
9617 AMDGPUOperand::ImmTyMatrixBFMT, 0);
9620 int MatrixAScaleIdx =
9621 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_a_scale);
9622 if (MatrixAScaleIdx != -1) {
9624 AMDGPUOperand::ImmTyMatrixAScale, 0);
9627 int MatrixBScaleIdx =
9628 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_b_scale);
9629 if (MatrixBScaleIdx != -1) {
9631 AMDGPUOperand::ImmTyMatrixBScale, 0);
9634 int MatrixAScaleFmtIdx =
9635 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_a_scale_fmt);
9636 if (MatrixAScaleFmtIdx != -1) {
9638 AMDGPUOperand::ImmTyMatrixAScaleFmt, 0);
9641 int MatrixBScaleFmtIdx =
9642 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_b_scale_fmt);
9643 if (MatrixBScaleFmtIdx != -1) {
9645 AMDGPUOperand::ImmTyMatrixBScaleFmt, 0);
9650 AMDGPUOperand::ImmTyMatrixAReuse, 0);
9654 AMDGPUOperand::ImmTyMatrixBReuse, 0);
9656 int NegLoIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::neg_lo);
9660 int NegHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::neg_hi);
9664 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9665 AMDGPU::OpName::src2};
9666 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9667 AMDGPU::OpName::src1_modifiers,
9668 AMDGPU::OpName::src2_modifiers};
9671 unsigned OpSelHi = 0;
9678 if (OpSelHiIdx != -1)
9687 for (
int J = 0; J < 3; ++J) {
9688 int OpIdx = AMDGPU::getNamedOperandIdx(
Opc, Ops[J]);
9692 int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
9700 if (
SrcOp.isReg() && getMRI()
9707 if ((OpSel & (1 << J)) != 0)
9711 if ((OpSelHi & (1 << J)) != 0)
9714 if ((NegLo & (1 << J)) != 0)
9717 if ((NegHi & (1 << J)) != 0)
9725 OptionalImmIndexMap OptIdx;
9731 unsigned i,
unsigned Opc,
9733 if (AMDGPU::getNamedOperandIdx(
Opc,
OpName) != -1)
9734 ((AMDGPUOperand &)*
Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
9736 ((AMDGPUOperand &)*
Operands[i]).addRegOperands(Inst, 1);
9742 ((AMDGPUOperand &)*
Operands[1]).addRegOperands(Inst, 1);
9745 ((AMDGPUOperand &)*
Operands[1]).addRegOperands(Inst, 1);
9746 ((AMDGPUOperand &)*
Operands[4]).addRegOperands(Inst, 1);
9748 OptionalImmIndexMap OptIdx;
9749 for (
unsigned i = 5; i <
Operands.size(); ++i) {
9750 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
9751 OptIdx[
Op.getImmTy()] = i;
9756 AMDGPUOperand::ImmTyIndexKey8bit);
9760 AMDGPUOperand::ImmTyIndexKey16bit);
9764 AMDGPUOperand::ImmTyIndexKey32bit);
9784 Operands.push_back(AMDGPUOperand::CreateToken(
this,
"::", S));
9785 SMLoc OpYLoc = getLoc();
9788 Operands.push_back(AMDGPUOperand::CreateToken(
this, OpYName, OpYLoc));
9791 return Error(OpYLoc,
"expected a VOPDY instruction after ::");
9800 auto addOp = [&](
uint16_t ParsedOprIdx) {
9801 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[ParsedOprIdx]);
9803 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9807 Op.addRegOperands(Inst, 1);
9811 Op.addImmOperands(Inst, 1);
9823 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
9827 const auto &CInfo = InstInfo[CompIdx];
9828 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
9829 for (
unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
9830 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
9831 if (CInfo.hasSrc2Acc())
9832 addOp(CInfo.getIndexOfDstInParsedOperands());
9836 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::bitop3);
9837 if (BitOp3Idx != -1) {
9838 OptionalImmIndexMap OptIdx;
9839 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands.back());
9851bool AMDGPUOperand::isDPP8()
const {
9852 return isImmTy(ImmTyDPP8);
9855bool AMDGPUOperand::isDPPCtrl()
const {
9856 using namespace AMDGPU::DPP;
9858 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(
getImm());
9861 return (
Imm >= DppCtrl::QUAD_PERM_FIRST &&
Imm <= DppCtrl::QUAD_PERM_LAST) ||
9862 (
Imm >= DppCtrl::ROW_SHL_FIRST &&
Imm <= DppCtrl::ROW_SHL_LAST) ||
9863 (
Imm >= DppCtrl::ROW_SHR_FIRST &&
Imm <= DppCtrl::ROW_SHR_LAST) ||
9864 (
Imm >= DppCtrl::ROW_ROR_FIRST &&
Imm <= DppCtrl::ROW_ROR_LAST) ||
9865 (
Imm == DppCtrl::WAVE_SHL1) ||
9866 (
Imm == DppCtrl::WAVE_ROL1) ||
9867 (
Imm == DppCtrl::WAVE_SHR1) ||
9868 (
Imm == DppCtrl::WAVE_ROR1) ||
9869 (
Imm == DppCtrl::ROW_MIRROR) ||
9870 (
Imm == DppCtrl::ROW_HALF_MIRROR) ||
9871 (
Imm == DppCtrl::BCAST15) ||
9872 (
Imm == DppCtrl::BCAST31) ||
9873 (
Imm >= DppCtrl::ROW_SHARE_FIRST &&
Imm <= DppCtrl::ROW_SHARE_LAST) ||
9874 (
Imm >= DppCtrl::ROW_XMASK_FIRST &&
Imm <= DppCtrl::ROW_XMASK_LAST);
9883bool AMDGPUOperand::isBLGP()
const {
9884 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(
getImm());
9887bool AMDGPUOperand::isS16Imm()
const {
9888 return isImmLiteral() && (isInt<16>(
getImm()) || isUInt<16>(
getImm()));
9891bool AMDGPUOperand::isU16Imm()
const {
9892 return isImmLiteral() && isUInt<16>(
getImm());
9899bool AMDGPUAsmParser::parseDimId(
unsigned &Encoding) {
9904 SMLoc Loc = getToken().getEndLoc();
9905 Token = std::string(getTokenStr());
9907 if (getLoc() != Loc)
9912 if (!parseId(Suffix))
9937 SMLoc Loc = getLoc();
9938 if (!parseDimId(Encoding))
9939 return Error(Loc,
"invalid dim value");
9941 Operands.push_back(AMDGPUOperand::CreateImm(
this, Encoding, S,
9942 AMDGPUOperand::ImmTyDim));
9960 if (!skipToken(
AsmToken::LBrac,
"expected an opening square bracket"))
9963 for (
size_t i = 0; i < 8; ++i) {
9967 SMLoc Loc = getLoc();
9968 if (getParser().parseAbsoluteExpression(Sels[i]))
9970 if (0 > Sels[i] || 7 < Sels[i])
9971 return Error(Loc,
"expected a 3-bit value");
9978 for (
size_t i = 0; i < 8; ++i)
9979 DPP8 |= (Sels[i] << (i * 3));
9981 Operands.push_back(AMDGPUOperand::CreateImm(
this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9986AMDGPUAsmParser::isSupportedDPPCtrl(
StringRef Ctrl,
9988 if (Ctrl ==
"row_newbcast")
9991 if (Ctrl ==
"row_share" ||
9992 Ctrl ==
"row_xmask")
9995 if (Ctrl ==
"wave_shl" ||
9996 Ctrl ==
"wave_shr" ||
9997 Ctrl ==
"wave_rol" ||
9998 Ctrl ==
"wave_ror" ||
9999 Ctrl ==
"row_bcast")
10002 return Ctrl ==
"row_mirror" ||
10003 Ctrl ==
"row_half_mirror" ||
10004 Ctrl ==
"quad_perm" ||
10005 Ctrl ==
"row_shl" ||
10006 Ctrl ==
"row_shr" ||
10011AMDGPUAsmParser::parseDPPCtrlPerm() {
10014 if (!skipToken(
AsmToken::LBrac,
"expected an opening square bracket"))
10018 for (
int i = 0; i < 4; ++i) {
10023 SMLoc Loc = getLoc();
10024 if (getParser().parseAbsoluteExpression(Temp))
10026 if (Temp < 0 || Temp > 3) {
10027 Error(Loc,
"expected a 2-bit value");
10031 Val += (Temp << i * 2);
10034 if (!skipToken(
AsmToken::RBrac,
"expected a closing square bracket"))
10041AMDGPUAsmParser::parseDPPCtrlSel(
StringRef Ctrl) {
10042 using namespace AMDGPU::DPP;
10047 SMLoc Loc = getLoc();
10049 if (getParser().parseAbsoluteExpression(Val))
10052 struct DppCtrlCheck {
10059 .
Case(
"wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
10060 .Case(
"wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
10061 .Case(
"wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
10062 .Case(
"wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
10063 .Case(
"row_shl", {DppCtrl::ROW_SHL0, 1, 15})
10064 .Case(
"row_shr", {DppCtrl::ROW_SHR0, 1, 15})
10065 .Case(
"row_ror", {DppCtrl::ROW_ROR0, 1, 15})
10066 .Case(
"row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
10067 .Case(
"row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
10068 .Case(
"row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
10072 if (
Check.Ctrl == -1) {
10073 Valid = (
Ctrl ==
"row_bcast" && (Val == 15 || Val == 31));
10074 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
10089 using namespace AMDGPU::DPP;
10092 !isSupportedDPPCtrl(getTokenStr(),
Operands))
10095 SMLoc S = getLoc();
10101 if (Ctrl ==
"row_mirror") {
10102 Val = DppCtrl::ROW_MIRROR;
10103 }
else if (Ctrl ==
"row_half_mirror") {
10104 Val = DppCtrl::ROW_HALF_MIRROR;
10107 if (Ctrl ==
"quad_perm") {
10108 Val = parseDPPCtrlPerm();
10110 Val = parseDPPCtrlSel(Ctrl);
10119 AMDGPUOperand::CreateImm(
this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
10125 OptionalImmIndexMap OptionalIdx;
10132 int OldIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::old);
10134 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2_modifiers);
10135 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
10139 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
10140 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
10144 int VdstInIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
10145 bool IsVOP3CvtSrDpp =
Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
10146 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
10147 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
10148 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
10150 for (
unsigned E =
Operands.size();
I != E; ++
I) {
10154 if (OldIdx == NumOperands) {
10156 constexpr int DST_IDX = 0;
10158 }
else if (Src2ModIdx == NumOperands) {
10168 if (IsVOP3CvtSrDpp) {
10177 if (TiedTo != -1) {
10182 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
10184 if (IsDPP8 &&
Op.isDppFI()) {
10187 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10188 }
else if (
Op.isReg()) {
10189 Op.addRegOperands(Inst, 1);
10190 }
else if (
Op.isImm() &&
10192 assert(!
Op.IsImmKindLiteral() &&
"Cannot use literal with DPP");
10193 Op.addImmOperands(Inst, 1);
10194 }
else if (
Op.isImm()) {
10195 OptionalIdx[
Op.getImmTy()] =
I;
10203 AMDGPUOperand::ImmTyClamp);
10209 AMDGPUOperand::ImmTyByteSel);
10216 cvtVOP3P(Inst,
Operands, OptionalIdx);
10218 cvtVOP3OpSel(Inst,
Operands, OptionalIdx);
10235 AMDGPUOperand::ImmTyDppFI);
10240 OptionalImmIndexMap OptionalIdx;
10244 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
10245 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
10249 for (
unsigned E =
Operands.size();
I != E; ++
I) {
10252 if (TiedTo != -1) {
10257 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
10259 if (
Op.isReg() && validateVccOperand(
Op.getReg())) {
10267 Op.addImmOperands(Inst, 1);
10269 Op.addRegWithFPInputModsOperands(Inst, 2);
10270 }
else if (
Op.isDppFI()) {
10272 }
else if (
Op.isReg()) {
10273 Op.addRegOperands(Inst, 1);
10279 Op.addRegWithFPInputModsOperands(Inst, 2);
10280 }
else if (
Op.isReg()) {
10281 Op.addRegOperands(Inst, 1);
10282 }
else if (
Op.isDPPCtrl()) {
10283 Op.addImmOperands(Inst, 1);
10284 }
else if (
Op.isImm()) {
10286 OptionalIdx[
Op.getImmTy()] =
I;
10302 AMDGPUOperand::ImmTyDppFI);
10313 AMDGPUOperand::ImmTy
Type) {
10314 return parseStringOrIntWithPrefix(
10316 {
"BYTE_0",
"BYTE_1",
"BYTE_2",
"BYTE_3",
"WORD_0",
"WORD_1",
"DWORD"},
10321 return parseStringOrIntWithPrefix(
10322 Operands,
"dst_unused", {
"UNUSED_PAD",
"UNUSED_SEXT",
"UNUSED_PRESERVE"},
10323 AMDGPUOperand::ImmTySDWADstUnused);
10352 OptionalImmIndexMap OptionalIdx;
10353 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
10354 bool SkippedVcc =
false;
10358 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
10359 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
10362 for (
unsigned E =
Operands.size();
I != E; ++
I) {
10363 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
10364 if (SkipVcc && !SkippedVcc &&
Op.isReg() &&
10365 (
Op.getReg() == AMDGPU::VCC ||
Op.getReg() == AMDGPU::VCC_LO)) {
10383 Op.addRegOrImmWithInputModsOperands(Inst, 2);
10384 }
else if (
Op.isImm()) {
10386 OptionalIdx[
Op.getImmTy()] =
I;
10390 SkippedVcc =
false;
10394 if (
Opc != AMDGPU::V_NOP_sdwa_gfx10 &&
Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
10395 Opc != AMDGPU::V_NOP_sdwa_vi) {
10397 switch (BasicInstType) {
10401 AMDGPUOperand::ImmTyClamp, 0);
10405 AMDGPUOperand::ImmTyOModSI, 0);
10409 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10413 AMDGPUOperand::ImmTySDWADstUnused,
10414 DstUnused::UNUSED_PRESERVE);
10421 AMDGPUOperand::ImmTyClamp, 0);
10435 AMDGPUOperand::ImmTyClamp, 0);
10441 llvm_unreachable(
"Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
10447 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
10448 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
10449 auto *it = Inst.
begin();
10451 it, AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::src2));
10463#define GET_REGISTER_MATCHER
10464#define GET_MATCHER_IMPLEMENTATION
10465#define GET_MNEMONIC_SPELL_CHECKER
10466#define GET_MNEMONIC_CHECKER
10467#include "AMDGPUGenAsmMatcher.inc"
10473 return parseTokenOp(
"addr64",
Operands);
10475 return parseTokenOp(
"done",
Operands);
10477 return parseTokenOp(
"idxen",
Operands);
10479 return parseTokenOp(
"lds",
Operands);
10481 return parseTokenOp(
"offen",
Operands);
10483 return parseTokenOp(
"off",
Operands);
10484 case MCK_row_95_en:
10485 return parseTokenOp(
"row_en",
Operands);
10487 return parseNamedBit(
"gds",
Operands, AMDGPUOperand::ImmTyGDS);
10489 return parseNamedBit(
"tfe",
Operands, AMDGPUOperand::ImmTyTFE);
10491 return tryCustomParseOperand(
Operands, MCK);
10502 AMDGPUOperand &Operand = (AMDGPUOperand&)
Op;
10505 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
10507 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
10509 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
10511 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
10513 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
10515 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
10523 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
10525 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
10526 case MCK_SOPPBrTarget:
10527 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
10528 case MCK_VReg32OrOff:
10529 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
10530 case MCK_InterpSlot:
10531 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
10532 case MCK_InterpAttr:
10533 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
10534 case MCK_InterpAttrChan:
10535 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
10537 case MCK_SReg_64_XEXEC:
10547 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
10549 return Match_InvalidOperand;
10558 SMLoc S = getLoc();
10566 if (!isUInt<16>(
Imm))
10567 return Error(S,
"expected a 16-bit value");
10570 AMDGPUOperand::CreateImm(
this,
Imm, S, AMDGPUOperand::ImmTyEndpgm));
10574bool AMDGPUOperand::isEndpgm()
const {
return isImmTy(ImmTyEndpgm); }
10580bool AMDGPUOperand::isSplitBarrier()
const {
return isInlinableImm(MVT::i32); }
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, AMDGPU::OpName OpName)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static bool IsRevOpcode(const unsigned Opcode)
static int getRegClass(RegisterKind Is, unsigned RegWidth)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static MCRegister getSpecialRegForName(StringRef RegName)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0, std::optional< unsigned > InsertAt=std::nullopt)
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
#define LLVM_EXTERNAL_VISIBILITY
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static unsigned getOperandSize(MachineInstr &MI, unsigned Idx, MachineRegisterInfo &MRI)
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
mir Rename Register Operands
Register const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
Interface definition for SIInstrInfo.
unsigned unsigned DefaultVal
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file implements the SmallBitVector class.
StringSet - A set-like wrapper for the StringMap.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
support::ulittle16_t & Lo
support::ulittle16_t & Hi
static const char * getRegisterName(MCRegister Reg)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
Target independent representation for an assembler token.
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
bool is(TokenKind K) const
TokenKind getKind() const
This class represents an Operation in the Expression.
Base class for user error types.
Lightweight error class with error context and mandatory checking.
Tagged union holding either a T or a Error.
Class representing an expression and its matching format.
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
This class is intended to be used as a base class for asm properties and features specific to the tar...
void printExpr(raw_ostream &, const MCExpr &) const
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
MCAsmParser & getParser()
Generic assembler parser interface, for use by target specific assembly parsers.
MCStreamer & getStreamer()
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
const MCRegisterInfo * getRegisterInfo() const
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
const MCSubtargetInfo * getSubtargetInfo() const
Base class for the full range of assembler expressions which are needed for parsing.
Instances of this class represent a single low-level machine instruction.
unsigned getNumOperands() const
unsigned getOpcode() const
iterator insert(iterator I, const MCOperand &Op)
void addOperand(const MCOperand Op)
const MCOperand & getOperand(unsigned i) const
Describe properties that are true of each instruction in the target description file.
Interface to description of machine instruction set.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Instances of this class represent operands of the MCInst class.
static MCOperand createExpr(const MCExpr *Val)
static MCOperand createReg(MCRegister Reg)
static MCOperand createImm(int64_t Val)
void setReg(MCRegister Reg)
Set the register number.
MCRegister getReg() const
Returns the register number.
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
virtual bool isReg() const =0
isReg - Is this a register operand?
virtual bool isMem() const =0
isMem - Is this a memory operand?
virtual MCRegister getReg() const =0
virtual bool isToken() const =0
isToken - Is this a token operand?
virtual bool isImm() const =0
isImm - Is this an immediate operand?
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
constexpr bool isValid() const
Streaming machine code generation interface.
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
MCTargetStreamer * getTargetStreamer()
Generic base class for all target subtargets.
const FeatureBitset & getFeatureBits() const
FeatureBitset ToggleFeature(uint64_t FB)
Toggle a feature and return the re-computed feature bits.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
LLVM_ABI void setVariableValue(const MCExpr *Value)
MCTargetAsmParser - Generic interface to target specific assembly parsers.
virtual bool parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands)=0
Parse one assembly instruction.
MCSubtargetInfo & copySTI()
Create a copy of STI and return a non-const reference to it.
virtual bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
virtual bool ParseDirective(AsmToken DirectiveID)
ParseDirective - Parse a target specific assembler directive This method is deprecated,...
virtual ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
tryParseRegister - parse one register if possible
virtual bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm)=0
Recognize a series of operands of a parsed instruction as an actual MCInst and emit it to the specifi...
void setAvailableFeatures(const FeatureBitset &Value)
const MCSubtargetInfo & getSTI() const
virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind)
Allow a target to add special case operand matching for things that tblgen doesn't/can't handle effec...
virtual unsigned checkTargetMatchPredicate(MCInst &Inst)
checkTargetMatchPredicate - Validate the instruction match against any complex target predicates not ...
Target specific streamer interface.
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
Wrapper class representing virtual and physical registers.
Represents a location in source code.
static SMLoc getFromPointer(const char *Ptr)
constexpr const char * getPointer() const
constexpr bool isValid() const
Represents a range in source code.
Implements a dense probed hash-table based set with some number of buckets stored inline.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringMapEntry - This is used to represent one value that is inserted into a StringMap.
StringRef - Represent a constant reference to a string, i.e.
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
size - Get the string size.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
bool consume_front(StringRef Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringSet - A wrapper for StringMap that provides set-like functionality.
bool contains(StringRef key) const
Check if the set contains the given key.
std::pair< typename Base::iterator, bool > insert(StringRef key)
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
bool hasMAIInsts(const MCSubtargetInfo &STI)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCSubtargetInfo &ST)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isGFX1250(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ UNDEF
UNDEF - An undefined node.
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Reg
All possible values of the reg field in the ModR/M byte.
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
This is an optimization pass for GlobalISel generic memory operations.
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
int popcount(T Value) noexcept
Count the number of set bits in a value.
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
void PrintError(const Twine &Msg)
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Target & getTheR600Target()
The target for R600 GPUs.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Sub
Subtraction of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
const char * toString(DWARFSectionKind Kind)
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, bool InitMCExpr=true)
Instruction set architecture version.
const MCExpr * compute_pgm_rsrc2
const MCExpr * kernarg_size
const MCExpr * kernarg_preload
const MCExpr * compute_pgm_rsrc3
const MCExpr * private_segment_fixed_size
const MCExpr * compute_pgm_rsrc1
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
const MCExpr * group_segment_fixed_size
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
const MCExpr * kernel_code_properties
Represents the counter values to wait for in an s_waitcnt instruction.
static LLVM_ABI const fltSemantics & IEEEsingle() LLVM_READNONE
static LLVM_ABI const fltSemantics & IEEEhalf() LLVM_READNONE
static LLVM_ABI const fltSemantics & BFloat() LLVM_READNONE
opStatus
IEEE-754R 7: Default exception handling.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Description of the encoding of one expression Op.
Direction
An enum for the direction of the loop.
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...
uint32_t group_segment_fixed_size
uint32_t private_segment_fixed_size