56enum RegisterKind { IS_UNKNOWN,
IS_VGPR, IS_SGPR,
IS_AGPR, IS_TTMP, IS_SPECIAL };
70 SMLoc StartLoc, EndLoc;
71 const AMDGPUAsmParser *AsmParser;
74 AMDGPUOperand(KindTy Kind_,
const AMDGPUAsmParser *AsmParser_)
75 : Kind(Kind_), AsmParser(AsmParser_) {}
77 using Ptr = std::unique_ptr<AMDGPUOperand>;
86 bool hasFPModifiers()
const {
return Abs || Neg; }
87 bool hasIntModifiers()
const {
return Sext; }
88 bool hasModifiers()
const {
return hasFPModifiers() || hasIntModifiers(); }
90 int64_t getFPModifiersOperand()
const {
97 int64_t getIntModifiersOperand()
const {
103 int64_t getModifiersOperand()
const {
104 assert(!(hasFPModifiers() && hasIntModifiers())
105 &&
"fp and int modifiers should not be used simultaneously");
106 if (hasFPModifiers())
107 return getFPModifiersOperand();
108 if (hasIntModifiers())
109 return getIntModifiersOperand();
113 friend raw_ostream &
operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
183 ImmTyMatrixAScaleFmt,
184 ImmTyMatrixBScaleFmt,
201 ImmKindTyMandatoryLiteral,
215 mutable ImmKindTy Kind;
232 bool isToken()
const override {
return Kind == Token; }
234 bool isSymbolRefExpr()
const {
238 bool isImm()
const override {
239 return Kind == Immediate;
242 void setImmKindNone()
const {
244 Imm.Kind = ImmKindTyNone;
247 void setImmKindLiteral()
const {
249 Imm.Kind = ImmKindTyLiteral;
252 void setImmKindMandatoryLiteral()
const {
254 Imm.Kind = ImmKindTyMandatoryLiteral;
257 void setImmKindConst()
const {
259 Imm.Kind = ImmKindTyConst;
262 bool IsImmKindLiteral()
const {
263 return isImm() &&
Imm.Kind == ImmKindTyLiteral;
266 bool IsImmKindMandatoryLiteral()
const {
267 return isImm() &&
Imm.Kind == ImmKindTyMandatoryLiteral;
270 bool isImmKindConst()
const {
271 return isImm() &&
Imm.Kind == ImmKindTyConst;
274 bool isInlinableImm(MVT type)
const;
275 bool isLiteralImm(MVT type)
const;
277 bool isRegKind()
const {
278 return Kind == Register;
281 bool isReg()
const override {
282 return isRegKind() && !hasModifiers();
285 bool isRegOrInline(
unsigned RCID, MVT type)
const {
286 return isRegClass(RCID) || isInlinableImm(type);
290 return isRegOrInline(RCID, type) || isLiteralImm(type);
293 bool isRegOrImmWithInt16InputMods()
const {
297 template <
bool IsFake16>
bool isRegOrImmWithIntT16InputMods()
const {
299 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
302 bool isRegOrImmWithInt32InputMods()
const {
306 bool isRegOrInlineImmWithInt16InputMods()
const {
307 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
310 template <
bool IsFake16>
bool isRegOrInlineImmWithIntT16InputMods()
const {
311 return isRegOrInline(
312 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
315 bool isRegOrInlineImmWithInt32InputMods()
const {
316 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
319 bool isRegOrImmWithInt64InputMods()
const {
323 bool isRegOrImmWithFP16InputMods()
const {
327 template <
bool IsFake16>
bool isRegOrImmWithFPT16InputMods()
const {
329 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
332 bool isRegOrImmWithFP32InputMods()
const {
336 bool isRegOrImmWithFP64InputMods()
const {
340 template <
bool IsFake16>
bool isRegOrInlineImmWithFP16InputMods()
const {
341 return isRegOrInline(
342 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
345 bool isRegOrInlineImmWithFP32InputMods()
const {
346 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
349 bool isRegOrInlineImmWithFP64InputMods()
const {
350 return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
353 bool isVRegWithInputMods(
unsigned RCID)
const {
return isRegClass(RCID); }
355 bool isVRegWithFP32InputMods()
const {
356 return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
359 bool isVRegWithFP64InputMods()
const {
360 return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
363 bool isPackedFP16InputMods()
const {
367 bool isPackedVGPRFP32InputMods()
const {
371 bool isVReg()
const {
372 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
373 isRegClass(AMDGPU::VReg_64RegClassID) ||
374 isRegClass(AMDGPU::VReg_96RegClassID) ||
375 isRegClass(AMDGPU::VReg_128RegClassID) ||
376 isRegClass(AMDGPU::VReg_160RegClassID) ||
377 isRegClass(AMDGPU::VReg_192RegClassID) ||
378 isRegClass(AMDGPU::VReg_256RegClassID) ||
379 isRegClass(AMDGPU::VReg_512RegClassID) ||
380 isRegClass(AMDGPU::VReg_1024RegClassID);
383 bool isVReg32()
const {
384 return isRegClass(AMDGPU::VGPR_32RegClassID);
387 bool isVReg32OrOff()
const {
388 return isOff() || isVReg32();
392 return isRegKind() &&
getReg() == AMDGPU::SGPR_NULL;
395 bool isVRegWithInputMods()
const;
396 template <
bool IsFake16>
bool isT16_Lo128VRegWithInputMods()
const;
397 template <
bool IsFake16>
bool isT16VRegWithInputMods()
const;
399 bool isSDWAOperand(MVT type)
const;
400 bool isSDWAFP16Operand()
const;
401 bool isSDWAFP32Operand()
const;
402 bool isSDWAInt16Operand()
const;
403 bool isSDWAInt32Operand()
const;
405 bool isImmTy(ImmTy ImmT)
const {
406 return isImm() &&
Imm.Type == ImmT;
409 template <ImmTy Ty>
bool isImmTy()
const {
return isImmTy(Ty); }
411 bool isImmLiteral()
const {
return isImmTy(ImmTyNone); }
413 bool isImmModifier()
const {
414 return isImm() &&
Imm.Type != ImmTyNone;
417 bool isOModSI()
const {
return isImmTy(ImmTyOModSI); }
418 bool isDim()
const {
return isImmTy(ImmTyDim); }
419 bool isR128A16()
const {
return isImmTy(ImmTyR128A16); }
420 bool isOff()
const {
return isImmTy(ImmTyOff); }
421 bool isExpTgt()
const {
return isImmTy(ImmTyExpTgt); }
422 bool isOffen()
const {
return isImmTy(ImmTyOffen); }
423 bool isIdxen()
const {
return isImmTy(ImmTyIdxen); }
424 bool isAddr64()
const {
return isImmTy(ImmTyAddr64); }
425 bool isSMEMOffsetMod()
const {
return isImmTy(ImmTySMEMOffsetMod); }
426 bool isFlatOffset()
const {
return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
427 bool isGDS()
const {
return isImmTy(ImmTyGDS); }
428 bool isLDS()
const {
return isImmTy(ImmTyLDS); }
429 bool isCPol()
const {
return isImmTy(ImmTyCPol); }
430 bool isIndexKey8bit()
const {
return isImmTy(ImmTyIndexKey8bit); }
431 bool isIndexKey16bit()
const {
return isImmTy(ImmTyIndexKey16bit); }
432 bool isIndexKey32bit()
const {
return isImmTy(ImmTyIndexKey32bit); }
433 bool isMatrixAFMT()
const {
return isImmTy(ImmTyMatrixAFMT); }
434 bool isMatrixBFMT()
const {
return isImmTy(ImmTyMatrixBFMT); }
435 bool isMatrixAScale()
const {
return isImmTy(ImmTyMatrixAScale); }
436 bool isMatrixBScale()
const {
return isImmTy(ImmTyMatrixBScale); }
437 bool isMatrixAScaleFmt()
const {
return isImmTy(ImmTyMatrixAScaleFmt); }
438 bool isMatrixBScaleFmt()
const {
return isImmTy(ImmTyMatrixBScaleFmt); }
439 bool isMatrixAReuse()
const {
return isImmTy(ImmTyMatrixAReuse); }
440 bool isMatrixBReuse()
const {
return isImmTy(ImmTyMatrixBReuse); }
441 bool isTFE()
const {
return isImmTy(ImmTyTFE); }
442 bool isFORMAT()
const {
return isImmTy(ImmTyFORMAT) &&
isUInt<7>(
getImm()); }
443 bool isDppFI()
const {
return isImmTy(ImmTyDppFI); }
444 bool isSDWADstSel()
const {
return isImmTy(ImmTySDWADstSel); }
445 bool isSDWASrc0Sel()
const {
return isImmTy(ImmTySDWASrc0Sel); }
446 bool isSDWASrc1Sel()
const {
return isImmTy(ImmTySDWASrc1Sel); }
447 bool isSDWADstUnused()
const {
return isImmTy(ImmTySDWADstUnused); }
448 bool isInterpSlot()
const {
return isImmTy(ImmTyInterpSlot); }
449 bool isInterpAttr()
const {
return isImmTy(ImmTyInterpAttr); }
450 bool isInterpAttrChan()
const {
return isImmTy(ImmTyInterpAttrChan); }
451 bool isOpSel()
const {
return isImmTy(ImmTyOpSel); }
452 bool isOpSelHi()
const {
return isImmTy(ImmTyOpSelHi); }
453 bool isNegLo()
const {
return isImmTy(ImmTyNegLo); }
454 bool isNegHi()
const {
return isImmTy(ImmTyNegHi); }
455 bool isBitOp3()
const {
return isImmTy(ImmTyBitOp3) &&
isUInt<8>(
getImm()); }
457 bool isRegOrImm()
const {
458 return isReg() || isImm();
461 bool isRegClass(
unsigned RCID)
const;
465 bool isRegOrInlineNoMods(
unsigned RCID, MVT type)
const {
466 return isRegOrInline(RCID, type) && !hasModifiers();
469 bool isSCSrcB16()
const {
470 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
473 bool isSCSrcV2B16()
const {
477 bool isSCSrc_b32()
const {
478 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
481 bool isSCSrc_b64()
const {
482 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
485 bool isBoolReg()
const;
487 bool isSCSrcF16()
const {
488 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
491 bool isSCSrcV2F16()
const {
495 bool isSCSrcF32()
const {
496 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
499 bool isSCSrcF64()
const {
500 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
503 bool isSSrc_b32()
const {
504 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
507 bool isSSrc_b16()
const {
return isSCSrcB16() || isLiteralImm(MVT::i16); }
509 bool isSSrcV2B16()
const {
514 bool isSSrc_b64()
const {
517 return isSCSrc_b64() || isLiteralImm(MVT::i64) ||
518 (((
const MCTargetAsmParser *)AsmParser)
519 ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
523 bool isSSrc_f32()
const {
524 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
527 bool isSSrcF64()
const {
return isSCSrc_b64() || isLiteralImm(MVT::f64); }
529 bool isSSrc_bf16()
const {
return isSCSrcB16() || isLiteralImm(MVT::bf16); }
531 bool isSSrc_f16()
const {
return isSCSrcB16() || isLiteralImm(MVT::f16); }
533 bool isSSrcV2F16()
const {
538 bool isSSrcV2FP32()
const {
543 bool isSCSrcV2FP32()
const {
548 bool isSSrcV2INT32()
const {
553 bool isSCSrcV2INT32()
const {
555 return isSCSrc_b32();
558 bool isSSrcOrLds_b32()
const {
559 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
560 isLiteralImm(MVT::i32) || isExpr();
563 bool isVCSrc_b32()
const {
564 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
567 bool isVCSrc_b32_Lo256()
const {
568 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo256RegClassID, MVT::i32);
571 bool isVCSrc_b64_Lo256()
const {
572 return isRegOrInlineNoMods(AMDGPU::VS_64_Lo256RegClassID, MVT::i64);
575 bool isVCSrc_b64()
const {
576 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
579 bool isVCSrcT_b16()
const {
580 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
583 bool isVCSrcTB16_Lo128()
const {
584 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
587 bool isVCSrcFake16B16_Lo128()
const {
588 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
591 bool isVCSrc_b16()
const {
592 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
595 bool isVCSrc_v2b16()
const {
return isVCSrc_b16(); }
597 bool isVCSrc_f32()
const {
598 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
601 bool isVCSrc_f64()
const {
602 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
605 bool isVCSrcTBF16()
const {
606 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
609 bool isVCSrcT_f16()
const {
610 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
613 bool isVCSrcT_bf16()
const {
614 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
617 bool isVCSrcTBF16_Lo128()
const {
618 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
621 bool isVCSrcTF16_Lo128()
const {
622 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
625 bool isVCSrcFake16BF16_Lo128()
const {
626 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
629 bool isVCSrcFake16F16_Lo128()
const {
630 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
633 bool isVCSrc_bf16()
const {
634 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
637 bool isVCSrc_f16()
const {
638 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
641 bool isVCSrc_v2bf16()
const {
return isVCSrc_bf16(); }
643 bool isVCSrc_v2f16()
const {
return isVCSrc_f16(); }
645 bool isVSrc_b32()
const {
646 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
649 bool isVSrc_b64()
const {
return isVCSrc_f64() || isLiteralImm(MVT::i64); }
651 bool isVSrcT_b16()
const {
return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
653 bool isVSrcT_b16_Lo128()
const {
654 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
657 bool isVSrcFake16_b16_Lo128()
const {
658 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
661 bool isVSrc_b16()
const {
return isVCSrc_b16() || isLiteralImm(MVT::i16); }
663 bool isVSrc_v2b16()
const {
return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
665 bool isVCSrcV2FP32()
const {
return isVCSrc_f64(); }
667 bool isVSrc_v2f32()
const {
return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
669 bool isVCSrc_v2b32()
const {
return isVCSrc_b64(); }
671 bool isVSrc_v2b32()
const {
return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
673 bool isVSrc_f32()
const {
674 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
677 bool isVSrc_f64()
const {
return isVCSrc_f64() || isLiteralImm(MVT::f64); }
679 bool isVSrcT_bf16()
const {
return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
681 bool isVSrcT_f16()
const {
return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
683 bool isVSrcT_bf16_Lo128()
const {
684 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
687 bool isVSrcT_f16_Lo128()
const {
688 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
691 bool isVSrcFake16_bf16_Lo128()
const {
692 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
695 bool isVSrcFake16_f16_Lo128()
const {
696 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
699 bool isVSrc_bf16()
const {
return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
701 bool isVSrc_f16()
const {
return isVCSrc_f16() || isLiteralImm(MVT::f16); }
703 bool isVSrc_v2bf16()
const {
704 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
707 bool isVSrc_v2f16()
const {
return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
709 bool isVSrc_NoInline_v2f16()
const {
return isVSrc_v2f16(); }
711 bool isVISrcB32()
const {
712 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
715 bool isVISrcB16()
const {
716 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
719 bool isVISrcV2B16()
const {
723 bool isVISrcF32()
const {
724 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
727 bool isVISrcF16()
const {
728 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
731 bool isVISrcV2F16()
const {
732 return isVISrcF16() || isVISrcB32();
735 bool isVISrc_64_bf16()
const {
736 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
739 bool isVISrc_64_f16()
const {
740 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
743 bool isVISrc_64_b32()
const {
744 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
747 bool isVISrc_64B64()
const {
748 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
751 bool isVISrc_64_f64()
const {
752 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
755 bool isVISrc_64V2FP32()
const {
756 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
759 bool isVISrc_64V2INT32()
const {
760 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
763 bool isVISrc_256_b32()
const {
764 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
767 bool isVISrc_256_f32()
const {
768 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
771 bool isVISrc_256B64()
const {
772 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
775 bool isVISrc_256_f64()
const {
776 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
779 bool isVISrc_512_f64()
const {
780 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64);
783 bool isVISrc_128B16()
const {
784 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
787 bool isVISrc_128V2B16()
const {
788 return isVISrc_128B16();
791 bool isVISrc_128_b32()
const {
792 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
795 bool isVISrc_128_f32()
const {
796 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
799 bool isVISrc_256V2FP32()
const {
800 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
803 bool isVISrc_256V2INT32()
const {
804 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
807 bool isVISrc_512_b32()
const {
808 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
811 bool isVISrc_512B16()
const {
812 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
815 bool isVISrc_512V2B16()
const {
816 return isVISrc_512B16();
819 bool isVISrc_512_f32()
const {
820 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
823 bool isVISrc_512F16()
const {
824 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
827 bool isVISrc_512V2F16()
const {
828 return isVISrc_512F16() || isVISrc_512_b32();
831 bool isVISrc_1024_b32()
const {
832 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
835 bool isVISrc_1024B16()
const {
836 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
839 bool isVISrc_1024V2B16()
const {
840 return isVISrc_1024B16();
843 bool isVISrc_1024_f32()
const {
844 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
847 bool isVISrc_1024F16()
const {
848 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
851 bool isVISrc_1024V2F16()
const {
852 return isVISrc_1024F16() || isVISrc_1024_b32();
855 bool isAISrcB32()
const {
856 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
859 bool isAISrcB16()
const {
860 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
863 bool isAISrcV2B16()
const {
867 bool isAISrcF32()
const {
868 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
871 bool isAISrcF16()
const {
872 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
875 bool isAISrcV2F16()
const {
876 return isAISrcF16() || isAISrcB32();
879 bool isAISrc_64B64()
const {
880 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
883 bool isAISrc_64_f64()
const {
884 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
887 bool isAISrc_128_b32()
const {
888 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
891 bool isAISrc_128B16()
const {
892 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
895 bool isAISrc_128V2B16()
const {
896 return isAISrc_128B16();
899 bool isAISrc_128_f32()
const {
900 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
903 bool isAISrc_128F16()
const {
904 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
907 bool isAISrc_128V2F16()
const {
908 return isAISrc_128F16() || isAISrc_128_b32();
911 bool isVISrc_128_bf16()
const {
912 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
915 bool isVISrc_128_f16()
const {
916 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
919 bool isVISrc_128V2F16()
const {
920 return isVISrc_128_f16() || isVISrc_128_b32();
923 bool isAISrc_256B64()
const {
924 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
927 bool isAISrc_256_f64()
const {
928 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
931 bool isAISrc_512_b32()
const {
932 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
935 bool isAISrc_512B16()
const {
936 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
939 bool isAISrc_512V2B16()
const {
940 return isAISrc_512B16();
943 bool isAISrc_512_f32()
const {
944 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
947 bool isAISrc_512F16()
const {
948 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
951 bool isAISrc_512V2F16()
const {
952 return isAISrc_512F16() || isAISrc_512_b32();
955 bool isAISrc_1024_b32()
const {
956 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
959 bool isAISrc_1024B16()
const {
960 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
963 bool isAISrc_1024V2B16()
const {
964 return isAISrc_1024B16();
967 bool isAISrc_1024_f32()
const {
968 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
971 bool isAISrc_1024F16()
const {
972 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
975 bool isAISrc_1024V2F16()
const {
976 return isAISrc_1024F16() || isAISrc_1024_b32();
979 bool isKImmFP32()
const {
980 return isLiteralImm(MVT::f32);
983 bool isKImmFP16()
const {
984 return isLiteralImm(MVT::f16);
987 bool isKImmFP64()
const {
return isLiteralImm(MVT::f64); }
989 bool isMem()
const override {
993 bool isExpr()
const {
994 return Kind == Expression;
997 bool isSOPPBrTarget()
const {
return isExpr() || isImm(); }
999 bool isSWaitCnt()
const;
1000 bool isDepCtr()
const;
1001 bool isSDelayALU()
const;
1002 bool isHwreg()
const;
1003 bool isSendMsg()
const;
1004 bool isSplitBarrier()
const;
1005 bool isSwizzle()
const;
1006 bool isSMRDOffset8()
const;
1007 bool isSMEMOffset()
const;
1008 bool isSMRDLiteralOffset()
const;
1009 bool isDPP8()
const;
1010 bool isDPPCtrl()
const;
1011 bool isBLGP()
const;
1012 bool isGPRIdxMode()
const;
1013 bool isS16Imm()
const;
1014 bool isU16Imm()
const;
1015 bool isEndpgm()
const;
1017 auto getPredicate(std::function<
bool(
const AMDGPUOperand &
Op)>
P)
const {
1018 return [
this,
P]() {
return P(*
this); };
1023 return StringRef(Tok.Data, Tok.Length);
1031 void setImm(int64_t Val) {
1036 ImmTy getImmTy()
const {
1041 MCRegister
getReg()
const override {
1046 SMLoc getStartLoc()
const override {
1050 SMLoc getEndLoc()
const override {
1054 SMRange getLocRange()
const {
1055 return SMRange(StartLoc, EndLoc);
1058 Modifiers getModifiers()
const {
1059 assert(isRegKind() || isImmTy(ImmTyNone));
1060 return isRegKind() ?
Reg.Mods :
Imm.Mods;
1063 void setModifiers(Modifiers Mods) {
1064 assert(isRegKind() || isImmTy(ImmTyNone));
1071 bool hasModifiers()
const {
1072 return getModifiers().hasModifiers();
1075 bool hasFPModifiers()
const {
1076 return getModifiers().hasFPModifiers();
1079 bool hasIntModifiers()
const {
1080 return getModifiers().hasIntModifiers();
1083 uint64_t applyInputFPModifiers(uint64_t Val,
unsigned Size)
const;
1085 void addImmOperands(MCInst &Inst,
unsigned N,
bool ApplyModifiers =
true)
const;
1087 void addLiteralImmOperand(MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const;
1089 void addRegOperands(MCInst &Inst,
unsigned N)
const;
1091 void addRegOrImmOperands(MCInst &Inst,
unsigned N)
const {
1093 addRegOperands(Inst,
N);
1095 addImmOperands(Inst,
N);
1098 void addRegOrImmWithInputModsOperands(MCInst &Inst,
unsigned N)
const {
1099 Modifiers Mods = getModifiers();
1102 addRegOperands(Inst,
N);
1104 addImmOperands(Inst,
N,
false);
1108 void addRegOrImmWithFPInputModsOperands(MCInst &Inst,
unsigned N)
const {
1109 assert(!hasIntModifiers());
1110 addRegOrImmWithInputModsOperands(Inst,
N);
1113 void addRegOrImmWithIntInputModsOperands(MCInst &Inst,
unsigned N)
const {
1114 assert(!hasFPModifiers());
1115 addRegOrImmWithInputModsOperands(Inst,
N);
1118 void addRegWithInputModsOperands(MCInst &Inst,
unsigned N)
const {
1119 Modifiers Mods = getModifiers();
1122 addRegOperands(Inst,
N);
1125 void addRegWithFPInputModsOperands(MCInst &Inst,
unsigned N)
const {
1126 assert(!hasIntModifiers());
1127 addRegWithInputModsOperands(Inst,
N);
1130 void addRegWithIntInputModsOperands(MCInst &Inst,
unsigned N)
const {
1131 assert(!hasFPModifiers());
1132 addRegWithInputModsOperands(Inst,
N);
1135 static void printImmTy(raw_ostream& OS, ImmTy
Type) {
1138 case ImmTyNone: OS <<
"None";
break;
1139 case ImmTyGDS: OS <<
"GDS";
break;
1140 case ImmTyLDS: OS <<
"LDS";
break;
1141 case ImmTyOffen: OS <<
"Offen";
break;
1142 case ImmTyIdxen: OS <<
"Idxen";
break;
1143 case ImmTyAddr64: OS <<
"Addr64";
break;
1144 case ImmTyOffset: OS <<
"Offset";
break;
1145 case ImmTyInstOffset: OS <<
"InstOffset";
break;
1146 case ImmTyOffset0: OS <<
"Offset0";
break;
1147 case ImmTyOffset1: OS <<
"Offset1";
break;
1148 case ImmTySMEMOffsetMod: OS <<
"SMEMOffsetMod";
break;
1149 case ImmTyCPol: OS <<
"CPol";
break;
1150 case ImmTyIndexKey8bit: OS <<
"index_key";
break;
1151 case ImmTyIndexKey16bit: OS <<
"index_key";
break;
1152 case ImmTyIndexKey32bit: OS <<
"index_key";
break;
1153 case ImmTyTFE: OS <<
"TFE";
break;
1154 case ImmTyD16: OS <<
"D16";
break;
1155 case ImmTyFORMAT: OS <<
"FORMAT";
break;
1156 case ImmTyClamp: OS <<
"Clamp";
break;
1157 case ImmTyOModSI: OS <<
"OModSI";
break;
1158 case ImmTyDPP8: OS <<
"DPP8";
break;
1159 case ImmTyDppCtrl: OS <<
"DppCtrl";
break;
1160 case ImmTyDppRowMask: OS <<
"DppRowMask";
break;
1161 case ImmTyDppBankMask: OS <<
"DppBankMask";
break;
1162 case ImmTyDppBoundCtrl: OS <<
"DppBoundCtrl";
break;
1163 case ImmTyDppFI: OS <<
"DppFI";
break;
1164 case ImmTySDWADstSel: OS <<
"SDWADstSel";
break;
1165 case ImmTySDWASrc0Sel: OS <<
"SDWASrc0Sel";
break;
1166 case ImmTySDWASrc1Sel: OS <<
"SDWASrc1Sel";
break;
1167 case ImmTySDWADstUnused: OS <<
"SDWADstUnused";
break;
1168 case ImmTyDMask: OS <<
"DMask";
break;
1169 case ImmTyDim: OS <<
"Dim";
break;
1170 case ImmTyUNorm: OS <<
"UNorm";
break;
1171 case ImmTyDA: OS <<
"DA";
break;
1172 case ImmTyR128A16: OS <<
"R128A16";
break;
1173 case ImmTyA16: OS <<
"A16";
break;
1174 case ImmTyLWE: OS <<
"LWE";
break;
1175 case ImmTyOff: OS <<
"Off";
break;
1176 case ImmTyExpTgt: OS <<
"ExpTgt";
break;
1177 case ImmTyExpCompr: OS <<
"ExpCompr";
break;
1178 case ImmTyExpVM: OS <<
"ExpVM";
break;
1179 case ImmTyHwreg: OS <<
"Hwreg";
break;
1180 case ImmTySendMsg: OS <<
"SendMsg";
break;
1181 case ImmTyInterpSlot: OS <<
"InterpSlot";
break;
1182 case ImmTyInterpAttr: OS <<
"InterpAttr";
break;
1183 case ImmTyInterpAttrChan: OS <<
"InterpAttrChan";
break;
1184 case ImmTyOpSel: OS <<
"OpSel";
break;
1185 case ImmTyOpSelHi: OS <<
"OpSelHi";
break;
1186 case ImmTyNegLo: OS <<
"NegLo";
break;
1187 case ImmTyNegHi: OS <<
"NegHi";
break;
1188 case ImmTySwizzle: OS <<
"Swizzle";
break;
1189 case ImmTyGprIdxMode: OS <<
"GprIdxMode";
break;
1190 case ImmTyHigh: OS <<
"High";
break;
1191 case ImmTyBLGP: OS <<
"BLGP";
break;
1192 case ImmTyCBSZ: OS <<
"CBSZ";
break;
1193 case ImmTyABID: OS <<
"ABID";
break;
1194 case ImmTyEndpgm: OS <<
"Endpgm";
break;
1195 case ImmTyWaitVDST: OS <<
"WaitVDST";
break;
1196 case ImmTyWaitEXP: OS <<
"WaitEXP";
break;
1197 case ImmTyWaitVAVDst: OS <<
"WaitVAVDst";
break;
1198 case ImmTyWaitVMVSrc: OS <<
"WaitVMVSrc";
break;
1199 case ImmTyBitOp3: OS <<
"BitOp3";
break;
1200 case ImmTyMatrixAFMT: OS <<
"ImmTyMatrixAFMT";
break;
1201 case ImmTyMatrixBFMT: OS <<
"ImmTyMatrixBFMT";
break;
1202 case ImmTyMatrixAScale: OS <<
"ImmTyMatrixAScale";
break;
1203 case ImmTyMatrixBScale: OS <<
"ImmTyMatrixBScale";
break;
1204 case ImmTyMatrixAScaleFmt: OS <<
"ImmTyMatrixAScaleFmt";
break;
1205 case ImmTyMatrixBScaleFmt: OS <<
"ImmTyMatrixBScaleFmt";
break;
1206 case ImmTyMatrixAReuse: OS <<
"ImmTyMatrixAReuse";
break;
1207 case ImmTyMatrixBReuse: OS <<
"ImmTyMatrixBReuse";
break;
1208 case ImmTyScaleSel: OS <<
"ScaleSel" ;
break;
1209 case ImmTyByteSel: OS <<
"ByteSel" ;
break;
1214 void print(raw_ostream &OS,
const MCAsmInfo &MAI)
const override {
1218 <<
" mods: " <<
Reg.Mods <<
'>';
1222 if (getImmTy() != ImmTyNone) {
1223 OS <<
" type: "; printImmTy(OS, getImmTy());
1225 OS <<
" mods: " <<
Imm.Mods <<
'>';
1238 static AMDGPUOperand::Ptr CreateImm(
const AMDGPUAsmParser *AsmParser,
1239 int64_t Val, SMLoc Loc,
1240 ImmTy
Type = ImmTyNone,
1241 bool IsFPImm =
false) {
1242 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1244 Op->Imm.IsFPImm = IsFPImm;
1245 Op->Imm.Kind = ImmKindTyNone;
1247 Op->Imm.Mods = Modifiers();
1253 static AMDGPUOperand::Ptr CreateToken(
const AMDGPUAsmParser *AsmParser,
1254 StringRef Str, SMLoc Loc,
1255 bool HasExplicitEncodingSize =
true) {
1256 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1257 Res->Tok.Data = Str.data();
1258 Res->Tok.Length = Str.size();
1259 Res->StartLoc = Loc;
1264 static AMDGPUOperand::Ptr CreateReg(
const AMDGPUAsmParser *AsmParser,
1265 MCRegister
Reg, SMLoc S, SMLoc
E) {
1266 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1267 Op->Reg.RegNo =
Reg;
1268 Op->Reg.Mods = Modifiers();
1274 static AMDGPUOperand::Ptr CreateExpr(
const AMDGPUAsmParser *AsmParser,
1275 const class MCExpr *Expr, SMLoc S) {
1276 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1285 OS <<
"abs:" << Mods.Abs <<
" neg: " << Mods.Neg <<
" sext:" << Mods.Sext;
1296class KernelScopeInfo {
1297 int SgprIndexUnusedMin = -1;
1298 int VgprIndexUnusedMin = -1;
1299 int AgprIndexUnusedMin = -1;
1300 MCContext *Ctx =
nullptr;
1301 MCSubtargetInfo
const *MSTI =
nullptr;
1303 void usesSgprAt(
int i) {
1304 if (i >= SgprIndexUnusedMin) {
1305 SgprIndexUnusedMin = ++i;
1308 Ctx->getOrCreateSymbol(Twine(
".kernel.sgpr_count"));
1314 void usesVgprAt(
int i) {
1315 if (i >= VgprIndexUnusedMin) {
1316 VgprIndexUnusedMin = ++i;
1319 Ctx->getOrCreateSymbol(Twine(
".kernel.vgpr_count"));
1321 VgprIndexUnusedMin);
1327 void usesAgprAt(
int i) {
1332 if (i >= AgprIndexUnusedMin) {
1333 AgprIndexUnusedMin = ++i;
1336 Ctx->getOrCreateSymbol(Twine(
".kernel.agpr_count"));
1341 Ctx->getOrCreateSymbol(Twine(
".kernel.vgpr_count"));
1343 VgprIndexUnusedMin);
1350 KernelScopeInfo() =
default;
1354 MSTI = Ctx->getSubtargetInfo();
1356 usesSgprAt(SgprIndexUnusedMin = -1);
1357 usesVgprAt(VgprIndexUnusedMin = -1);
1359 usesAgprAt(AgprIndexUnusedMin = -1);
1363 void usesRegister(RegisterKind RegKind,
unsigned DwordRegIndex,
1364 unsigned RegWidth) {
1367 usesSgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1370 usesAgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1373 usesVgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1382 MCAsmParser &Parser;
1384 unsigned ForcedEncodingSize = 0;
1385 bool ForcedDPP =
false;
1386 bool ForcedSDWA =
false;
1387 KernelScopeInfo KernelScope;
1392#define GET_ASSEMBLER_HEADER
1393#include "AMDGPUGenAsmMatcher.inc"
1398 void createConstantSymbol(
StringRef Id, int64_t Val);
1400 bool ParseAsAbsoluteExpression(
uint32_t &Ret);
1418 const MCExpr *FlatScrUsed,
bool XNACKUsed,
1419 std::optional<bool> EnableWavefrontSize32,
1423 bool ParseDirectiveAMDGCNTarget();
1424 bool ParseDirectiveAMDHSACodeObjectVersion();
1425 bool ParseDirectiveAMDHSAKernel();
1427 bool ParseDirectiveAMDKernelCodeT();
1430 bool ParseDirectiveAMDGPUHsaKernel();
1432 bool ParseDirectiveISAVersion();
1433 bool ParseDirectiveHSAMetadata();
1434 bool ParseDirectivePALMetadataBegin();
1435 bool ParseDirectivePALMetadata();
1436 bool ParseDirectiveAMDGPULDS();
1440 bool ParseToEndDirective(
const char *AssemblerDirectiveBegin,
1441 const char *AssemblerDirectiveEnd,
1442 std::string &CollectString);
1444 bool AddNextRegisterToList(
MCRegister &
Reg,
unsigned &RegWidth,
1446 bool ParseAMDGPURegister(RegisterKind &RegKind,
MCRegister &
Reg,
1447 unsigned &RegNum,
unsigned &RegWidth,
1448 bool RestoreOnFailure =
false);
1449 bool ParseAMDGPURegister(RegisterKind &RegKind,
MCRegister &
Reg,
1450 unsigned &RegNum,
unsigned &RegWidth,
1452 MCRegister ParseRegularReg(RegisterKind &RegKind,
unsigned &RegNum,
1455 MCRegister ParseSpecialReg(RegisterKind &RegKind,
unsigned &RegNum,
1458 MCRegister ParseRegList(RegisterKind &RegKind,
unsigned &RegNum,
1461 bool ParseRegRange(
unsigned &Num,
unsigned &Width,
unsigned &
SubReg);
1462 MCRegister getRegularReg(RegisterKind RegKind,
unsigned RegNum,
1467 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1468 void initializeGprCountSymbol(RegisterKind RegKind);
1469 bool updateGprCountSymbols(RegisterKind RegKind,
unsigned DwordRegIndex,
1476 OperandMode_Default,
1480 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1482 AMDGPUAsmParser(
const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1483 const MCInstrInfo &MII,
1484 const MCTargetOptions &
Options)
1485 : MCTargetAsmParser(
Options, STI, MII), Parser(_Parser) {
1488 if (getFeatureBits().
none()) {
1490 copySTI().ToggleFeature(
"southern-islands");
1493 FeatureBitset FB = getFeatureBits();
1494 if (!FB[AMDGPU::FeatureWavefrontSize64] &&
1495 !FB[AMDGPU::FeatureWavefrontSize32]) {
1499 copySTI().ToggleFeature(AMDGPU::FeatureWavefrontSize32);
1502 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1506 createConstantSymbol(
".amdgcn.gfx_generation_number",
ISA.Major);
1507 createConstantSymbol(
".amdgcn.gfx_generation_minor",
ISA.Minor);
1508 createConstantSymbol(
".amdgcn.gfx_generation_stepping",
ISA.Stepping);
1510 createConstantSymbol(
".option.machine_version_major",
ISA.Major);
1511 createConstantSymbol(
".option.machine_version_minor",
ISA.Minor);
1512 createConstantSymbol(
".option.machine_version_stepping",
ISA.Stepping);
1515 initializeGprCountSymbol(IS_VGPR);
1516 initializeGprCountSymbol(IS_SGPR);
1521 createConstantSymbol(Symbol, Code);
1523 createConstantSymbol(
"UC_VERSION_W64_BIT", 0x2000);
1524 createConstantSymbol(
"UC_VERSION_W32_BIT", 0x4000);
1525 createConstantSymbol(
"UC_VERSION_MDP_BIT", 0x8000);
1597 bool hasInv2PiInlineImm()
const {
1598 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1601 bool has64BitLiterals()
const {
1602 return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1605 bool hasFlatOffsets()
const {
1606 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1609 bool hasTrue16Insts()
const {
1610 return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1614 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1617 bool hasSGPR102_SGPR103()
const {
1621 bool hasSGPR104_SGPR105()
const {
return isGFX10Plus(); }
1623 bool hasIntClamp()
const {
1624 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1627 bool hasPartialNSAEncoding()
const {
1628 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1631 bool hasGloballyAddressableScratch()
const {
1632 return getFeatureBits()[AMDGPU::FeatureGloballyAddressableScratch];
1645 AMDGPUTargetStreamer &getTargetStreamer() {
1646 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1647 return static_cast<AMDGPUTargetStreamer &
>(TS);
1650 const MCRegisterInfo *getMRI()
const {
1653 return const_cast<AMDGPUAsmParser*
>(
this)->
getContext().getRegisterInfo();
1656 const MCInstrInfo *getMII()
const {
1660 const FeatureBitset &getFeatureBits()
const {
1661 return getSTI().getFeatureBits();
1664 void setForcedEncodingSize(
unsigned Size) { ForcedEncodingSize =
Size; }
1665 void setForcedDPP(
bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1666 void setForcedSDWA(
bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1668 unsigned getForcedEncodingSize()
const {
return ForcedEncodingSize; }
1669 bool isForcedVOP3()
const {
return ForcedEncodingSize == 64; }
1670 bool isForcedDPP()
const {
return ForcedDPP; }
1671 bool isForcedSDWA()
const {
return ForcedSDWA; }
1672 ArrayRef<unsigned> getMatchedVariants()
const;
1673 StringRef getMatchedVariantName()
const;
1675 std::unique_ptr<AMDGPUOperand> parseRegister(
bool RestoreOnFailure =
false);
1676 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1677 bool RestoreOnFailure);
1678 bool parseRegister(MCRegister &
Reg, SMLoc &StartLoc, SMLoc &EndLoc)
override;
1679 ParseStatus tryParseRegister(MCRegister &
Reg, SMLoc &StartLoc,
1680 SMLoc &EndLoc)
override;
1681 unsigned checkTargetMatchPredicate(MCInst &Inst)
override;
1682 unsigned validateTargetOperandClass(MCParsedAsmOperand &
Op,
1683 unsigned Kind)
override;
1684 bool matchAndEmitInstruction(SMLoc IDLoc,
unsigned &Opcode,
1686 uint64_t &ErrorInfo,
1687 bool MatchingInlineAsm)
override;
1688 bool ParseDirective(AsmToken DirectiveID)
override;
1690 OperandMode
Mode = OperandMode_Default);
1691 StringRef parseMnemonicSuffix(StringRef Name);
1692 bool parseInstruction(ParseInstructionInfo &
Info, StringRef Name,
1698 ParseStatus parseIntWithPrefix(
const char *Prefix, int64_t &
Int);
1702 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1703 std::function<
bool(int64_t &)> ConvertResult =
nullptr);
1705 ParseStatus parseOperandArrayWithPrefix(
1707 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1708 bool (*ConvertResult)(int64_t &) =
nullptr);
1712 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1713 unsigned getCPolKind(StringRef Id, StringRef Mnemo,
bool &Disabling)
const;
1717 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &
Value,
1721 ArrayRef<const char *> Ids,
1725 ArrayRef<const char *> Ids,
1726 AMDGPUOperand::ImmTy
Type);
1729 bool isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1730 bool isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1731 bool isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1732 bool isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1733 bool parseSP3NegModifier();
1735 bool HasLit =
false,
bool HasLit64 =
false);
1738 bool HasLit =
false,
bool HasLit64 =
false);
1740 bool AllowImm =
true);
1742 bool AllowImm =
true);
1747 AMDGPUOperand::ImmTy ImmTy);
1752 AMDGPUOperand::ImmTy
Type);
1756 AMDGPUOperand::ImmTy
Type);
1760 AMDGPUOperand::ImmTy
Type);
1764 ParseStatus parseDfmtNfmt(int64_t &
Format);
1765 ParseStatus parseUfmt(int64_t &
Format);
1766 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1768 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1771 ParseStatus parseSymbolicOrNumericFormat(int64_t &
Format);
1772 ParseStatus parseNumericFormat(int64_t &
Format);
1776 bool tryParseFmt(
const char *Pref, int64_t MaxVal, int64_t &Val);
1777 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1781 bool parseCnt(int64_t &IntVal);
1784 bool parseDepCtr(int64_t &IntVal,
unsigned &Mask);
1785 void depCtrError(SMLoc Loc,
int ErrorId, StringRef DepCtrName);
1788 bool parseDelay(int64_t &Delay);
1794 struct OperandInfoTy {
1797 bool IsSymbolic =
false;
1798 bool IsDefined =
false;
1800 OperandInfoTy(int64_t Val) : Val(Val) {}
1803 struct StructuredOpField : OperandInfoTy {
1807 bool IsDefined =
false;
1809 StructuredOpField(StringLiteral Id, StringLiteral Desc,
unsigned Width,
1811 : OperandInfoTy(
Default), Id(Id), Desc(Desc), Width(Width) {}
1812 virtual ~StructuredOpField() =
default;
1814 bool Error(AMDGPUAsmParser &Parser,
const Twine &Err)
const {
1815 Parser.Error(Loc,
"invalid " + Desc +
": " + Err);
1819 virtual bool validate(AMDGPUAsmParser &Parser)
const {
1821 return Error(Parser,
"not supported on this GPU");
1823 return Error(Parser,
"only " + Twine(Width) +
"-bit values are legal");
1831 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &
Op, OperandInfoTy &Stream);
1832 bool validateSendMsg(
const OperandInfoTy &Msg,
1833 const OperandInfoTy &
Op,
1834 const OperandInfoTy &Stream);
1836 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &
Offset,
1837 OperandInfoTy &Width);
1843 SMLoc getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
1848 bool SearchMandatoryLiterals =
false)
const;
1857 bool validateSOPLiteral(
const MCInst &Inst)
const;
1859 std::optional<unsigned> checkVOPDRegBankConstraints(
const MCInst &Inst,
1862 bool tryVOPD(
const MCInst &Inst);
1863 bool tryVOPD3(
const MCInst &Inst);
1864 bool tryAnotherVOPDEncoding(
const MCInst &Inst);
1866 bool validateIntClampSupported(
const MCInst &Inst);
1867 bool validateMIMGAtomicDMask(
const MCInst &Inst);
1868 bool validateMIMGGatherDMask(
const MCInst &Inst);
1870 bool validateMIMGDataSize(
const MCInst &Inst,
const SMLoc &IDLoc);
1871 bool validateMIMGAddrSize(
const MCInst &Inst,
const SMLoc &IDLoc);
1872 bool validateMIMGD16(
const MCInst &Inst);
1874 bool validateTensorR128(
const MCInst &Inst);
1875 bool validateMIMGMSAA(
const MCInst &Inst);
1876 bool validateOpSel(
const MCInst &Inst);
1877 bool validateTrue16OpSel(
const MCInst &Inst);
1878 bool validateNeg(
const MCInst &Inst, AMDGPU::OpName OpName);
1880 bool validateVccOperand(MCRegister
Reg)
const;
1885 bool validateAGPRLdSt(
const MCInst &Inst)
const;
1886 bool validateVGPRAlign(
const MCInst &Inst)
const;
1890 bool validateDivScale(
const MCInst &Inst);
1893 const SMLoc &IDLoc);
1895 const unsigned CPol);
1898 std::optional<StringRef> validateLdsDirect(
const MCInst &Inst);
1900 unsigned getConstantBusLimit(
unsigned Opcode)
const;
1901 bool usesConstantBus(
const MCInst &Inst,
unsigned OpIdx);
1902 bool isInlineConstant(
const MCInst &Inst,
unsigned OpIdx)
const;
1903 unsigned findImplicitSGPRReadInVOP(
const MCInst &Inst)
const;
1905 bool isSupportedMnemo(StringRef Mnemo,
1906 const FeatureBitset &FBS);
1907 bool isSupportedMnemo(StringRef Mnemo,
1908 const FeatureBitset &FBS,
1909 ArrayRef<unsigned> Variants);
1910 bool checkUnsupportedInstruction(StringRef Name,
const SMLoc &IDLoc);
1912 bool isId(
const StringRef Id)
const;
1913 bool isId(
const AsmToken &Token,
const StringRef Id)
const;
1915 StringRef getId()
const;
1916 bool trySkipId(
const StringRef Id);
1917 bool trySkipId(
const StringRef Pref,
const StringRef Id);
1921 bool parseString(StringRef &Val,
const StringRef ErrMsg =
"expected a string");
1922 bool parseId(StringRef &Val,
const StringRef ErrMsg =
"");
1928 StringRef getTokenStr()
const;
1929 AsmToken peekToken(
bool ShouldSkipSpace =
true);
1931 SMLoc getLoc()
const;
1935 void onBeginOfFile()
override;
1936 bool parsePrimaryExpr(
const MCExpr *&Res, SMLoc &EndLoc)
override;
1947 bool parseSwizzleOperand(int64_t &
Op,
const unsigned MinVal,
1948 const unsigned MaxVal,
const Twine &ErrMsg,
1950 bool parseSwizzleOperands(
const unsigned OpNum, int64_t*
Op,
1951 const unsigned MinVal,
1952 const unsigned MaxVal,
1953 const StringRef ErrMsg);
1955 bool parseSwizzleOffset(int64_t &
Imm);
1956 bool parseSwizzleMacro(int64_t &
Imm);
1957 bool parseSwizzleQuadPerm(int64_t &
Imm);
1958 bool parseSwizzleBitmaskPerm(int64_t &
Imm);
1959 bool parseSwizzleBroadcast(int64_t &
Imm);
1960 bool parseSwizzleSwap(int64_t &
Imm);
1961 bool parseSwizzleReverse(int64_t &
Imm);
1962 bool parseSwizzleFFT(int64_t &
Imm);
1963 bool parseSwizzleRotate(int64_t &
Imm);
1966 int64_t parseGPRIdxMacro();
1974 OptionalImmIndexMap &OptionalIdx);
1983 OptionalImmIndexMap &OptionalIdx);
1985 OptionalImmIndexMap &OptionalIdx);
1990 bool parseDimId(
unsigned &Encoding);
1992 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1996 int64_t parseDPPCtrlSel(StringRef Ctrl);
1997 int64_t parseDPPCtrlPerm();
2003 bool IsDPP8 =
false);
2009 AMDGPUOperand::ImmTy
Type);
2017 uint64_t BasicInstType,
2018 bool SkipDstVcc =
false,
2019 bool SkipSrcVcc =
false);
2127bool AMDGPUOperand::isInlinableImm(
MVT type)
const {
2137 if (!isImmTy(ImmTyNone)) {
2148 if (type == MVT::f64 || type == MVT::i64) {
2150 AsmParser->hasInv2PiInlineImm());
2153 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64,
Imm.Val));
2172 APFloat::rmNearestTiesToEven, &Lost);
2179 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2181 AsmParser->hasInv2PiInlineImm());
2186 static_cast<int32_t
>(FPLiteral.bitcastToAPInt().getZExtValue()),
2187 AsmParser->hasInv2PiInlineImm());
2191 if (type == MVT::f64 || type == MVT::i64) {
2193 AsmParser->hasInv2PiInlineImm());
2202 static_cast<int16_t
>(
Literal.getLoBits(16).getSExtValue()),
2203 type, AsmParser->hasInv2PiInlineImm());
2207 static_cast<int32_t
>(
Literal.getLoBits(32).getZExtValue()),
2208 AsmParser->hasInv2PiInlineImm());
2211bool AMDGPUOperand::isLiteralImm(MVT type)
const {
2213 if (!isImmTy(ImmTyNone)) {
2218 (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();
2223 if (type == MVT::f64 && hasFPModifiers()) {
2243 if (type == MVT::f64) {
2248 if (type == MVT::i64) {
2261 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2262 : (type == MVT::v2i16) ? MVT::f32
2263 : (type == MVT::v2f32) ? MVT::f32
2266 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64,
Imm.Val));
2270bool AMDGPUOperand::isRegClass(
unsigned RCID)
const {
2271 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(
getReg());
2274bool AMDGPUOperand::isVRegWithInputMods()
const {
2275 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2277 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2278 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2281template <
bool IsFake16>
2282bool AMDGPUOperand::isT16_Lo128VRegWithInputMods()
const {
2283 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2284 : AMDGPU::VGPR_16_Lo128RegClassID);
2287template <
bool IsFake16>
bool AMDGPUOperand::isT16VRegWithInputMods()
const {
2288 return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2289 : AMDGPU::VGPR_16RegClassID);
2292bool AMDGPUOperand::isSDWAOperand(MVT type)
const {
2293 if (AsmParser->isVI())
2295 if (AsmParser->isGFX9Plus())
2296 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2300bool AMDGPUOperand::isSDWAFP16Operand()
const {
2301 return isSDWAOperand(MVT::f16);
2304bool AMDGPUOperand::isSDWAFP32Operand()
const {
2305 return isSDWAOperand(MVT::f32);
2308bool AMDGPUOperand::isSDWAInt16Operand()
const {
2309 return isSDWAOperand(MVT::i16);
2312bool AMDGPUOperand::isSDWAInt32Operand()
const {
2313 return isSDWAOperand(MVT::i32);
2316bool AMDGPUOperand::isBoolReg()
const {
2317 auto FB = AsmParser->getFeatureBits();
2318 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
2319 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2322uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val,
unsigned Size)
const
2324 assert(isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2327 const uint64_t FpSignMask = (1ULL << (
Size * 8 - 1));
2339void AMDGPUOperand::addImmOperands(MCInst &Inst,
unsigned N,
bool ApplyModifiers)
const {
2347 addLiteralImmOperand(Inst,
Imm.Val,
2349 isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2351 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2357void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const {
2358 const auto& InstDesc = AsmParser->getMII()->get(Inst.
getOpcode());
2363 if (ApplyModifiers) {
2366 Val = applyInputFPModifiers(Val,
Size);
2370 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2380 AsmParser->hasInv2PiInlineImm())) {
2389 bool HasMandatoryLiteral =
2392 if (
Literal.getLoBits(32) != 0 &&
2393 (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
2394 !HasMandatoryLiteral) {
2395 const_cast<AMDGPUAsmParser *
>(AsmParser)->
Warning(
2397 "Can't encode literal as exact 64-bit floating-point operand. "
2398 "Low 32-bits will be set to zero");
2399 Val &= 0xffffffff00000000u;
2403 setImmKindLiteral();
2414 setImmKindMandatoryLiteral();
2421 if (AsmParser->hasInv2PiInlineImm() &&
Literal == 0x3fc45f306725feed) {
2427 setImmKindLiteral();
2456 APFloat::rmNearestTiesToEven, &lost);
2460 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2463 setImmKindMandatoryLiteral();
2465 setImmKindLiteral();
2493 AsmParser->hasInv2PiInlineImm())) {
2503 setImmKindLiteral();
2518 if (!AsmParser->has64BitLiterals() || getModifiers().Lit)
2522 setImmKindLiteral();
2536 if (!AsmParser->has64BitLiterals()) {
2537 Val =
static_cast<uint64_t
>(Val) << 32;
2544 if (getModifiers().Lit ||
2546 Val =
static_cast<uint64_t
>(Val) << 32;
2550 setImmKindLiteral();
2563 setImmKindLiteral();
2570 AsmParser->hasInv2PiInlineImm())) {
2577 setImmKindLiteral();
2584 AsmParser->hasInv2PiInlineImm())) {
2591 setImmKindLiteral();
2603 AsmParser->hasInv2PiInlineImm()));
2612 AsmParser->hasInv2PiInlineImm()));
2620 setImmKindMandatoryLiteral();
2624 setImmKindMandatoryLiteral();
2631 setImmKindMandatoryLiteral();
2638void AMDGPUOperand::addRegOperands(MCInst &Inst,
unsigned N)
const {
2642bool AMDGPUOperand::isInlineValue()
const {
2650void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2661 if (Is == IS_VGPR) {
2665 return AMDGPU::VGPR_32RegClassID;
2667 return AMDGPU::VReg_64RegClassID;
2669 return AMDGPU::VReg_96RegClassID;
2671 return AMDGPU::VReg_128RegClassID;
2673 return AMDGPU::VReg_160RegClassID;
2675 return AMDGPU::VReg_192RegClassID;
2677 return AMDGPU::VReg_224RegClassID;
2679 return AMDGPU::VReg_256RegClassID;
2681 return AMDGPU::VReg_288RegClassID;
2683 return AMDGPU::VReg_320RegClassID;
2685 return AMDGPU::VReg_352RegClassID;
2687 return AMDGPU::VReg_384RegClassID;
2689 return AMDGPU::VReg_512RegClassID;
2691 return AMDGPU::VReg_1024RegClassID;
2693 }
else if (Is == IS_TTMP) {
2697 return AMDGPU::TTMP_32RegClassID;
2699 return AMDGPU::TTMP_64RegClassID;
2701 return AMDGPU::TTMP_128RegClassID;
2703 return AMDGPU::TTMP_256RegClassID;
2705 return AMDGPU::TTMP_512RegClassID;
2707 }
else if (Is == IS_SGPR) {
2711 return AMDGPU::SGPR_32RegClassID;
2713 return AMDGPU::SGPR_64RegClassID;
2715 return AMDGPU::SGPR_96RegClassID;
2717 return AMDGPU::SGPR_128RegClassID;
2719 return AMDGPU::SGPR_160RegClassID;
2721 return AMDGPU::SGPR_192RegClassID;
2723 return AMDGPU::SGPR_224RegClassID;
2725 return AMDGPU::SGPR_256RegClassID;
2727 return AMDGPU::SGPR_288RegClassID;
2729 return AMDGPU::SGPR_320RegClassID;
2731 return AMDGPU::SGPR_352RegClassID;
2733 return AMDGPU::SGPR_384RegClassID;
2735 return AMDGPU::SGPR_512RegClassID;
2737 }
else if (Is == IS_AGPR) {
2741 return AMDGPU::AGPR_32RegClassID;
2743 return AMDGPU::AReg_64RegClassID;
2745 return AMDGPU::AReg_96RegClassID;
2747 return AMDGPU::AReg_128RegClassID;
2749 return AMDGPU::AReg_160RegClassID;
2751 return AMDGPU::AReg_192RegClassID;
2753 return AMDGPU::AReg_224RegClassID;
2755 return AMDGPU::AReg_256RegClassID;
2757 return AMDGPU::AReg_288RegClassID;
2759 return AMDGPU::AReg_320RegClassID;
2761 return AMDGPU::AReg_352RegClassID;
2763 return AMDGPU::AReg_384RegClassID;
2765 return AMDGPU::AReg_512RegClassID;
2767 return AMDGPU::AReg_1024RegClassID;
2775 .
Case(
"exec", AMDGPU::EXEC)
2776 .
Case(
"vcc", AMDGPU::VCC)
2777 .
Case(
"flat_scratch", AMDGPU::FLAT_SCR)
2778 .
Case(
"xnack_mask", AMDGPU::XNACK_MASK)
2779 .
Case(
"shared_base", AMDGPU::SRC_SHARED_BASE)
2780 .
Case(
"src_shared_base", AMDGPU::SRC_SHARED_BASE)
2781 .
Case(
"shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2782 .
Case(
"src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2783 .
Case(
"private_base", AMDGPU::SRC_PRIVATE_BASE)
2784 .
Case(
"src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2785 .
Case(
"private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2786 .
Case(
"src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2787 .
Case(
"src_flat_scratch_base_lo", AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)
2788 .
Case(
"src_flat_scratch_base_hi", AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)
2789 .
Case(
"pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2790 .
Case(
"src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2791 .
Case(
"lds_direct", AMDGPU::LDS_DIRECT)
2792 .
Case(
"src_lds_direct", AMDGPU::LDS_DIRECT)
2793 .
Case(
"m0", AMDGPU::M0)
2794 .
Case(
"vccz", AMDGPU::SRC_VCCZ)
2795 .
Case(
"src_vccz", AMDGPU::SRC_VCCZ)
2796 .
Case(
"execz", AMDGPU::SRC_EXECZ)
2797 .
Case(
"src_execz", AMDGPU::SRC_EXECZ)
2798 .
Case(
"scc", AMDGPU::SRC_SCC)
2799 .
Case(
"src_scc", AMDGPU::SRC_SCC)
2800 .
Case(
"tba", AMDGPU::TBA)
2801 .
Case(
"tma", AMDGPU::TMA)
2802 .
Case(
"flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2803 .
Case(
"flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2804 .
Case(
"xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2805 .
Case(
"xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2806 .
Case(
"vcc_lo", AMDGPU::VCC_LO)
2807 .
Case(
"vcc_hi", AMDGPU::VCC_HI)
2808 .
Case(
"exec_lo", AMDGPU::EXEC_LO)
2809 .
Case(
"exec_hi", AMDGPU::EXEC_HI)
2810 .
Case(
"tma_lo", AMDGPU::TMA_LO)
2811 .
Case(
"tma_hi", AMDGPU::TMA_HI)
2812 .
Case(
"tba_lo", AMDGPU::TBA_LO)
2813 .
Case(
"tba_hi", AMDGPU::TBA_HI)
2814 .
Case(
"pc", AMDGPU::PC_REG)
2815 .
Case(
"null", AMDGPU::SGPR_NULL)
2819bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2820 SMLoc &EndLoc,
bool RestoreOnFailure) {
2821 auto R = parseRegister();
2822 if (!R)
return true;
2824 RegNo =
R->getReg();
2825 StartLoc =
R->getStartLoc();
2826 EndLoc =
R->getEndLoc();
2830bool AMDGPUAsmParser::parseRegister(MCRegister &
Reg, SMLoc &StartLoc,
2832 return ParseRegister(
Reg, StartLoc, EndLoc,
false);
2835ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &
Reg, SMLoc &StartLoc,
2837 bool Result = ParseRegister(
Reg, StartLoc, EndLoc,
true);
2838 bool PendingErrors = getParser().hasPendingError();
2839 getParser().clearPendingErrors();
2847bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &
Reg,
unsigned &RegWidth,
2848 RegisterKind RegKind,
2849 MCRegister Reg1, SMLoc Loc) {
2852 if (
Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2857 if (
Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2858 Reg = AMDGPU::FLAT_SCR;
2862 if (
Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2863 Reg = AMDGPU::XNACK_MASK;
2867 if (
Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2872 if (
Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2877 if (
Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2882 Error(Loc,
"register does not fit in the list");
2888 if (Reg1 !=
Reg + RegWidth / 32) {
2889 Error(Loc,
"registers in a list must have consecutive indices");
2907 {{
"ttmp"}, IS_TTMP},
2913 return Kind == IS_VGPR ||
2921 if (Str.starts_with(
Reg.Name))
2927 return !Str.getAsInteger(10, Num);
2931AMDGPUAsmParser::isRegister(
const AsmToken &Token,
2932 const AsmToken &NextToken)
const {
2947 StringRef RegSuffix = Str.substr(
RegName.size());
2948 if (!RegSuffix.
empty()) {
2966AMDGPUAsmParser::isRegister()
2968 return isRegister(
getToken(), peekToken());
2971MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
unsigned RegNum,
2972 unsigned SubReg,
unsigned RegWidth,
2976 unsigned AlignSize = 1;
2977 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2983 if (RegNum % AlignSize != 0) {
2984 Error(Loc,
"invalid register alignment");
2985 return MCRegister();
2988 unsigned RegIdx = RegNum / AlignSize;
2991 Error(Loc,
"invalid or unsupported register size");
2992 return MCRegister();
2996 const MCRegisterClass RC =
TRI->getRegClass(RCID);
2997 if (RegIdx >= RC.
getNumRegs() || (RegKind == IS_VGPR && RegIdx > 255)) {
2998 Error(Loc,
"register index is out of range");
2999 return AMDGPU::NoRegister;
3002 if (RegKind == IS_VGPR && !
isGFX1250() && RegIdx + RegWidth / 32 > 256) {
3003 Error(Loc,
"register index is out of range");
3004 return MCRegister();
3020bool AMDGPUAsmParser::ParseRegRange(
unsigned &Num,
unsigned &RegWidth,
3022 int64_t RegLo, RegHi;
3026 SMLoc FirstIdxLoc = getLoc();
3033 SecondIdxLoc = getLoc();
3044 Error(FirstIdxLoc,
"invalid register index");
3049 Error(SecondIdxLoc,
"invalid register index");
3053 if (RegLo > RegHi) {
3054 Error(FirstIdxLoc,
"first register index should not exceed second index");
3058 if (RegHi == RegLo) {
3059 StringRef RegSuffix = getTokenStr();
3060 if (RegSuffix ==
".l") {
3063 }
else if (RegSuffix ==
".h") {
3069 Num =
static_cast<unsigned>(RegLo);
3070 RegWidth = 32 * ((RegHi - RegLo) + 1);
3075MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
3078 SmallVectorImpl<AsmToken> &Tokens) {
3084 RegKind = IS_SPECIAL;
3091MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
3094 SmallVectorImpl<AsmToken> &Tokens) {
3096 StringRef
RegName = getTokenStr();
3097 auto Loc = getLoc();
3101 Error(Loc,
"invalid register name");
3102 return MCRegister();
3110 unsigned SubReg = NoSubRegister;
3111 if (!RegSuffix.
empty()) {
3119 Error(Loc,
"invalid register index");
3120 return MCRegister();
3125 if (!ParseRegRange(RegNum, RegWidth,
SubReg))
3126 return MCRegister();
3129 return getRegularReg(RegKind, RegNum,
SubReg, RegWidth, Loc);
3132MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
3133 unsigned &RegNum,
unsigned &RegWidth,
3134 SmallVectorImpl<AsmToken> &Tokens) {
3136 auto ListLoc = getLoc();
3139 "expected a register or a list of registers")) {
3140 return MCRegister();
3145 auto Loc = getLoc();
3146 if (!ParseAMDGPURegister(RegKind,
Reg, RegNum, RegWidth))
3147 return MCRegister();
3148 if (RegWidth != 32) {
3149 Error(Loc,
"expected a single 32-bit register");
3150 return MCRegister();
3154 RegisterKind NextRegKind;
3156 unsigned NextRegNum, NextRegWidth;
3159 if (!ParseAMDGPURegister(NextRegKind, NextReg,
3160 NextRegNum, NextRegWidth,
3162 return MCRegister();
3164 if (NextRegWidth != 32) {
3165 Error(Loc,
"expected a single 32-bit register");
3166 return MCRegister();
3168 if (NextRegKind != RegKind) {
3169 Error(Loc,
"registers in a list must be of the same kind");
3170 return MCRegister();
3172 if (!AddNextRegisterToList(
Reg, RegWidth, RegKind, NextReg, Loc))
3173 return MCRegister();
3177 "expected a comma or a closing square bracket")) {
3178 return MCRegister();
3182 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3187bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3188 MCRegister &
Reg,
unsigned &RegNum,
3190 SmallVectorImpl<AsmToken> &Tokens) {
3191 auto Loc = getLoc();
3195 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3197 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3199 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3204 assert(Parser.hasPendingError());
3208 if (!subtargetHasRegister(*
TRI,
Reg)) {
3209 if (
Reg == AMDGPU::SGPR_NULL) {
3210 Error(Loc,
"'null' operand is not supported on this GPU");
3213 " register not available on this GPU");
3221bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3222 MCRegister &
Reg,
unsigned &RegNum,
3224 bool RestoreOnFailure ) {
3228 if (ParseAMDGPURegister(RegKind,
Reg, RegNum, RegWidth, Tokens)) {
3229 if (RestoreOnFailure) {
3230 while (!Tokens.
empty()) {
3239std::optional<StringRef>
3240AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3243 return StringRef(
".amdgcn.next_free_vgpr");
3245 return StringRef(
".amdgcn.next_free_sgpr");
3247 return std::nullopt;
3251void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3252 auto SymbolName = getGprCountSymbolName(RegKind);
3253 assert(SymbolName &&
"initializing invalid register kind");
3259bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3260 unsigned DwordRegIndex,
3261 unsigned RegWidth) {
3266 auto SymbolName = getGprCountSymbolName(RegKind);
3271 int64_t NewMax = DwordRegIndex +
divideCeil(RegWidth, 32) - 1;
3275 return !
Error(getLoc(),
3276 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3280 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3282 if (OldCount <= NewMax)
3288std::unique_ptr<AMDGPUOperand>
3289AMDGPUAsmParser::parseRegister(
bool RestoreOnFailure) {
3291 SMLoc StartLoc = Tok.getLoc();
3292 SMLoc EndLoc = Tok.getEndLoc();
3293 RegisterKind RegKind;
3295 unsigned RegNum, RegWidth;
3297 if (!ParseAMDGPURegister(RegKind,
Reg, RegNum, RegWidth)) {
3301 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3304 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3305 return AMDGPUOperand::CreateReg(
this,
Reg, StartLoc, EndLoc);
3309 bool HasSP3AbsModifier,
bool HasLit,
3313 if (isRegister() || isModifier())
3316 if (!HasLit && !HasLit64) {
3317 HasLit64 = trySkipId(
"lit64");
3318 HasLit = !HasLit64 && trySkipId(
"lit");
3319 if (HasLit || HasLit64) {
3322 ParseStatus S = parseImm(
Operands, HasSP3AbsModifier, HasLit, HasLit64);
3331 const auto& NextTok = peekToken();
3334 bool Negate =
false;
3342 AMDGPUOperand::Modifiers Mods;
3344 Mods.Lit64 = HasLit64;
3351 StringRef Num = getTokenStr();
3354 APFloat RealVal(APFloat::IEEEdouble());
3355 auto roundMode = APFloat::rmNearestTiesToEven;
3356 if (
errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3359 RealVal.changeSign();
3362 AMDGPUOperand::CreateImm(
this, RealVal.bitcastToAPInt().getZExtValue(), S,
3363 AMDGPUOperand::ImmTyNone,
true));
3364 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3365 Op.setModifiers(Mods);
3374 if (HasSP3AbsModifier) {
3383 if (getParser().parsePrimaryExpr(Expr, EndLoc,
nullptr))
3386 if (Parser.parseExpression(Expr))
3390 if (Expr->evaluateAsAbsolute(IntVal)) {
3391 Operands.push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
3392 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3393 Op.setModifiers(Mods);
3395 if (HasLit || HasLit64)
3397 Operands.push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
3410 if (
auto R = parseRegister()) {
3419 bool HasSP3AbsMod,
bool HasLit,
3421 ParseStatus Res = parseReg(
Operands);
3426 return parseImm(
Operands, HasSP3AbsMod, HasLit, HasLit64);
3430AMDGPUAsmParser::isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3433 return str ==
"abs" || str ==
"neg" || str ==
"sext";
3439AMDGPUAsmParser::isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3444AMDGPUAsmParser::isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3445 return isNamedOperandModifier(Token, NextToken) || Token.
is(
AsmToken::Pipe);
3449AMDGPUAsmParser::isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3450 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3467AMDGPUAsmParser::isModifier() {
3470 AsmToken NextToken[2];
3471 peekTokens(NextToken);
3473 return isOperandModifier(Tok, NextToken[0]) ||
3474 (Tok.
is(
AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3475 isOpcodeModifierWithVal(Tok, NextToken[0]);
3501AMDGPUAsmParser::parseSP3NegModifier() {
3503 AsmToken NextToken[2];
3504 peekTokens(NextToken);
3507 (isRegister(NextToken[0], NextToken[1]) ||
3509 isId(NextToken[0],
"abs"))) {
3527 return Error(getLoc(),
"invalid syntax, expected 'neg' modifier");
3529 SP3Neg = parseSP3NegModifier();
3532 Neg = trySkipId(
"neg");
3534 return Error(Loc,
"expected register or immediate");
3538 Abs = trySkipId(
"abs");
3542 Lit64 = trySkipId(
"lit64");
3546 if (!has64BitLiterals())
3547 return Error(Loc,
"lit64 is not supported on this GPU");
3550 Lit = !Lit64 && trySkipId(
"lit");
3557 return Error(Loc,
"expected register or immediate");
3561 Res = parseRegOrImm(
Operands, SP3Abs, Lit, Lit64);
3566 return (SP3Neg || Neg || SP3Abs || Abs || Lit || Lit64)
3570 if ((Lit || Lit64) && !
Operands.back()->isImm())
3571 Error(Loc,
"expected immediate with lit modifier");
3573 if (SP3Abs && !skipToken(
AsmToken::Pipe,
"expected vertical bar"))
3579 if ((Lit || Lit64) &&
3583 AMDGPUOperand::Modifiers Mods;
3584 Mods.Abs = Abs || SP3Abs;
3585 Mods.Neg = Neg || SP3Neg;
3589 if (Mods.hasFPModifiers() || Lit || Lit64) {
3590 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3592 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3593 Op.setModifiers(Mods);
3601 bool Sext = trySkipId(
"sext");
3602 if (Sext && !skipToken(
AsmToken::LParen,
"expected left paren after sext"))
3617 AMDGPUOperand::Modifiers Mods;
3620 if (Mods.hasIntModifiers()) {
3621 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3623 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3624 Op.setModifiers(Mods);
3631 return parseRegOrImmWithFPInputMods(
Operands,
false);
3635 return parseRegOrImmWithIntInputMods(
Operands,
false);
3639 auto Loc = getLoc();
3640 if (trySkipId(
"off")) {
3641 Operands.push_back(AMDGPUOperand::CreateImm(
this, 0, Loc,
3642 AMDGPUOperand::ImmTyOff,
false));
3649 std::unique_ptr<AMDGPUOperand>
Reg = parseRegister();
3658unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3665 return Match_InvalidOperand;
3667 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3668 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3671 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::dst_sel);
3673 if (!
Op.isImm() ||
Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3674 return Match_InvalidOperand;
3682 if (tryAnotherVOPDEncoding(Inst))
3683 return Match_InvalidOperand;
3685 return Match_Success;
3689 static const unsigned Variants[] = {
3699ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants()
const {
3700 if (isForcedDPP() && isForcedVOP3()) {
3704 if (getForcedEncodingSize() == 32) {
3709 if (isForcedVOP3()) {
3714 if (isForcedSDWA()) {
3720 if (isForcedDPP()) {
3728StringRef AMDGPUAsmParser::getMatchedVariantName()
const {
3729 if (isForcedDPP() && isForcedVOP3())
3732 if (getForcedEncodingSize() == 32)
3747unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(
const MCInst &Inst)
const {
3751 case AMDGPU::FLAT_SCR:
3753 case AMDGPU::VCC_LO:
3754 case AMDGPU::VCC_HI:
3761 return AMDGPU::NoRegister;
3768bool AMDGPUAsmParser::isInlineConstant(
const MCInst &Inst,
3769 unsigned OpIdx)
const {
3779 int64_t Val = MO.
getImm();
3823unsigned AMDGPUAsmParser::getConstantBusLimit(
unsigned Opcode)
const {
3829 case AMDGPU::V_LSHLREV_B64_e64:
3830 case AMDGPU::V_LSHLREV_B64_gfx10:
3831 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3832 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3833 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3834 case AMDGPU::V_LSHRREV_B64_e64:
3835 case AMDGPU::V_LSHRREV_B64_gfx10:
3836 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3837 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3838 case AMDGPU::V_ASHRREV_I64_e64:
3839 case AMDGPU::V_ASHRREV_I64_gfx10:
3840 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3841 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3842 case AMDGPU::V_LSHL_B64_e64:
3843 case AMDGPU::V_LSHR_B64_e64:
3844 case AMDGPU::V_ASHR_I64_e64:
3857 bool AddMandatoryLiterals =
false) {
3860 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3864 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immX) : -1;
3866 return {getNamedOperandIdx(Opcode, OpName::src0X),
3867 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3868 getNamedOperandIdx(Opcode, OpName::vsrc2X),
3869 getNamedOperandIdx(Opcode, OpName::src0Y),
3870 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3871 getNamedOperandIdx(Opcode, OpName::vsrc2Y),
3876 return {getNamedOperandIdx(Opcode, OpName::src0),
3877 getNamedOperandIdx(Opcode, OpName::src1),
3878 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3881bool AMDGPUAsmParser::usesConstantBus(
const MCInst &Inst,
unsigned OpIdx) {
3884 return !isInlineConstant(Inst,
OpIdx);
3891 return isSGPR(PReg,
TRI) && PReg != SGPR_NULL;
3902 const unsigned Opcode = Inst.
getOpcode();
3903 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3906 if (!LaneSelOp.
isReg())
3909 return LaneSelReg ==
M0 || LaneSelReg == M0_gfxpre11;
3912bool AMDGPUAsmParser::validateConstantBusLimitations(
3914 const unsigned Opcode = Inst.
getOpcode();
3915 const MCInstrDesc &
Desc = MII.
get(Opcode);
3916 MCRegister LastSGPR;
3917 unsigned ConstantBusUseCount = 0;
3918 unsigned NumLiterals = 0;
3919 unsigned LiteralSize;
3921 if (!(
Desc.TSFlags &
3936 SmallDenseSet<unsigned> SGPRsUsed;
3937 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3938 if (SGPRUsed != AMDGPU::NoRegister) {
3939 SGPRsUsed.
insert(SGPRUsed);
3940 ++ConstantBusUseCount;
3945 for (
int OpIdx : OpIndices) {
3950 if (usesConstantBus(Inst,
OpIdx)) {
3959 if (SGPRsUsed.
insert(LastSGPR).second) {
3960 ++ConstantBusUseCount;
3980 if (NumLiterals == 0) {
3983 }
else if (LiteralSize !=
Size) {
3989 ConstantBusUseCount += NumLiterals;
3991 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3994 SMLoc LitLoc = getLitLoc(
Operands);
3995 SMLoc RegLoc = getRegLoc(LastSGPR,
Operands);
3997 Error(Loc,
"invalid operand (violates constant bus restrictions)");
4001std::optional<unsigned>
4002AMDGPUAsmParser::checkVOPDRegBankConstraints(
const MCInst &Inst,
bool AsVOPD3) {
4004 const unsigned Opcode = Inst.
getOpcode();
4010 auto getVRegIdx = [&](unsigned,
unsigned OperandIdx) {
4011 const MCOperand &Opr = Inst.
getOperand(OperandIdx);
4019 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
4020 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
4021 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250;
4025 for (
auto OpName : {OpName::src0X, OpName::src0Y}) {
4026 int I = getNamedOperandIdx(Opcode, OpName);
4030 int64_t
Imm =
Op.getImm();
4036 for (
auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
4037 OpName::vsrc2Y, OpName::imm}) {
4038 int I = getNamedOperandIdx(Opcode, OpName);
4048 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
4049 getVRegIdx, *
TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
4051 return InvalidCompOprIdx;
4054bool AMDGPUAsmParser::validateVOPD(
const MCInst &Inst,
4061 for (
const std::unique_ptr<MCParsedAsmOperand> &Operand :
Operands) {
4062 AMDGPUOperand &
Op = (AMDGPUOperand &)*Operand;
4063 if ((
Op.isRegKind() ||
Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
4065 Error(
Op.getStartLoc(),
"ABS not allowed in VOPD3 instructions");
4069 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
4070 if (!InvalidCompOprIdx.has_value())
4073 auto CompOprIdx = *InvalidCompOprIdx;
4076 std::max(InstInfo[
VOPD::X].getIndexInParsedOperands(CompOprIdx),
4077 InstInfo[
VOPD::Y].getIndexInParsedOperands(CompOprIdx));
4080 auto Loc = ((AMDGPUOperand &)*
Operands[ParsedIdx]).getStartLoc();
4081 if (CompOprIdx == VOPD::Component::DST) {
4083 Error(Loc,
"dst registers must be distinct");
4085 Error(Loc,
"one dst register must be even and the other odd");
4087 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
4088 Error(Loc, Twine(
"src") + Twine(CompSrcIdx) +
4089 " operands must use different VGPR banks");
4097bool AMDGPUAsmParser::tryVOPD3(
const MCInst &Inst) {
4099 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst,
false);
4100 if (!InvalidCompOprIdx.has_value())
4104 InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst,
true);
4105 if (InvalidCompOprIdx.has_value()) {
4110 if (*InvalidCompOprIdx == VOPD::Component::DST)
4123bool AMDGPUAsmParser::tryVOPD(
const MCInst &Inst) {
4124 const unsigned Opcode = Inst.
getOpcode();
4139 for (
auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4140 OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4141 OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4142 int I = getNamedOperandIdx(Opcode, OpName);
4149 return !tryVOPD3(Inst);
4154bool AMDGPUAsmParser::tryAnotherVOPDEncoding(
const MCInst &Inst) {
4155 const unsigned Opcode = Inst.
getOpcode();
4160 return tryVOPD(Inst);
4161 return tryVOPD3(Inst);
4164bool AMDGPUAsmParser::validateIntClampSupported(
const MCInst &Inst) {
4170 int ClampIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::clamp);
4181bool AMDGPUAsmParser::validateMIMGDataSize(
const MCInst &Inst,
4182 const SMLoc &IDLoc) {
4190 int VDataIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
4191 int DMaskIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dmask);
4192 int TFEIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::tfe);
4201 unsigned TFESize = (TFEIdx != -1 && Inst.
getOperand(TFEIdx).
getImm()) ? 1 : 0;
4206 bool IsPackedD16 =
false;
4210 int D16Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::d16);
4211 IsPackedD16 = D16Idx >= 0;
4216 if ((VDataSize / 4) ==
DataSize + TFESize)
4221 Modifiers = IsPackedD16 ?
"dmask and d16" :
"dmask";
4223 Modifiers = IsPackedD16 ?
"dmask, d16 and tfe" :
"dmask and tfe";
4225 Error(IDLoc,
Twine(
"image data size does not match ") + Modifiers);
4229bool AMDGPUAsmParser::validateMIMGAddrSize(
const MCInst &Inst,
4230 const SMLoc &IDLoc) {
4239 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4241 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
4243 ? AMDGPU::OpName::srsrc
4244 : AMDGPU::OpName::rsrc;
4245 int SrsrcIdx = AMDGPU::getNamedOperandIdx(
Opc, RSrcOpName);
4246 int DimIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dim);
4247 int A16Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::a16);
4251 assert(SrsrcIdx > VAddr0Idx);
4254 if (BaseOpcode->
BVH) {
4255 if (IsA16 == BaseOpcode->
A16)
4257 Error(IDLoc,
"image address size does not match a16");
4263 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
4264 unsigned ActualAddrSize =
4265 IsNSA ? SrsrcIdx - VAddr0Idx
4268 unsigned ExpectedAddrSize =
4272 if (hasPartialNSAEncoding() &&
4275 int VAddrLastIdx = SrsrcIdx - 1;
4276 unsigned VAddrLastSize =
4279 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
4282 if (ExpectedAddrSize > 12)
4283 ExpectedAddrSize = 16;
4288 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4292 if (ActualAddrSize == ExpectedAddrSize)
4295 Error(IDLoc,
"image address size does not match dim and a16");
4299bool AMDGPUAsmParser::validateMIMGAtomicDMask(
const MCInst &Inst) {
4306 if (!
Desc.mayLoad() || !
Desc.mayStore())
4309 int DMaskIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dmask);
4316 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4319bool AMDGPUAsmParser::validateMIMGGatherDMask(
const MCInst &Inst) {
4327 int DMaskIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dmask);
4335 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4338bool AMDGPUAsmParser::validateMIMGDim(
const MCInst &Inst,
4353 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4354 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4361bool AMDGPUAsmParser::validateMIMGMSAA(
const MCInst &Inst) {
4369 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4372 if (!BaseOpcode->
MSAA)
4375 int DimIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dim);
4381 return DimInfo->
MSAA;
4387 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4388 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4389 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4399bool AMDGPUAsmParser::validateMovrels(
const MCInst &Inst,
4408 const int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
4412 const MCOperand &Src0 = Inst.
getOperand(Src0Idx);
4423 Error(ErrLoc,
"source operand must be a VGPR");
4427bool AMDGPUAsmParser::validateMAIAccWrite(
const MCInst &Inst,
4432 if (
Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4435 const int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
4438 const MCOperand &Src0 = Inst.
getOperand(Src0Idx);
4446 "source operand must be either a VGPR or an inline constant");
4453bool AMDGPUAsmParser::validateMAISrc2(
const MCInst &Inst,
4456 const MCInstrDesc &
Desc = MII.
get(Opcode);
4459 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4462 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4466 if (Inst.
getOperand(Src2Idx).
isImm() && isInlineConstant(Inst, Src2Idx)) {
4468 "inline constants are not allowed for this operand");
4475bool AMDGPUAsmParser::validateMFMA(
const MCInst &Inst,
4483 int BlgpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::blgp);
4484 if (BlgpIdx != -1) {
4485 if (
const MFMA_F8F6F4_Info *
Info = AMDGPU::isMFMA_F8F6F4(
Opc)) {
4486 int CbszIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::cbsz);
4496 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
4499 "wrong register tuple size for cbsz value " + Twine(CBSZ));
4504 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
4507 "wrong register tuple size for blgp value " + Twine(BLGP));
4515 const int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
4519 const MCOperand &Src2 = Inst.
getOperand(Src2Idx);
4523 MCRegister Src2Reg = Src2.
getReg();
4525 if (Src2Reg == DstReg)
4529 if (
TRI->getRegClass(
Desc.operands()[0].RegClass).getSizeInBits() <= 128)
4532 if (
TRI->regsOverlap(Src2Reg, DstReg)) {
4534 "source 2 operand must not partially overlap with dst");
4541bool AMDGPUAsmParser::validateDivScale(
const MCInst &Inst) {
4545 case V_DIV_SCALE_F32_gfx6_gfx7:
4546 case V_DIV_SCALE_F32_vi:
4547 case V_DIV_SCALE_F32_gfx10:
4548 case V_DIV_SCALE_F64_gfx6_gfx7:
4549 case V_DIV_SCALE_F64_vi:
4550 case V_DIV_SCALE_F64_gfx10:
4556 for (
auto Name : {AMDGPU::OpName::src0_modifiers,
4557 AMDGPU::OpName::src2_modifiers,
4558 AMDGPU::OpName::src2_modifiers}) {
4569bool AMDGPUAsmParser::validateMIMGD16(
const MCInst &Inst) {
4577 int D16Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::d16);
4586bool AMDGPUAsmParser::validateTensorR128(
const MCInst &Inst) {
4593 int R128Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::r128);
4601 case AMDGPU::V_SUBREV_F32_e32:
4602 case AMDGPU::V_SUBREV_F32_e64:
4603 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4604 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4605 case AMDGPU::V_SUBREV_F32_e32_vi:
4606 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4607 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4608 case AMDGPU::V_SUBREV_F32_e64_vi:
4610 case AMDGPU::V_SUBREV_CO_U32_e32:
4611 case AMDGPU::V_SUBREV_CO_U32_e64:
4612 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4613 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4615 case AMDGPU::V_SUBBREV_U32_e32:
4616 case AMDGPU::V_SUBBREV_U32_e64:
4617 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4618 case AMDGPU::V_SUBBREV_U32_e32_vi:
4619 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4620 case AMDGPU::V_SUBBREV_U32_e64_vi:
4622 case AMDGPU::V_SUBREV_U32_e32:
4623 case AMDGPU::V_SUBREV_U32_e64:
4624 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4625 case AMDGPU::V_SUBREV_U32_e32_vi:
4626 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4627 case AMDGPU::V_SUBREV_U32_e64_vi:
4629 case AMDGPU::V_SUBREV_F16_e32:
4630 case AMDGPU::V_SUBREV_F16_e64:
4631 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4632 case AMDGPU::V_SUBREV_F16_e32_vi:
4633 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4634 case AMDGPU::V_SUBREV_F16_e64_vi:
4636 case AMDGPU::V_SUBREV_U16_e32:
4637 case AMDGPU::V_SUBREV_U16_e64:
4638 case AMDGPU::V_SUBREV_U16_e32_vi:
4639 case AMDGPU::V_SUBREV_U16_e64_vi:
4641 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4642 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4643 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4645 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4646 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4648 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4649 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4651 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4652 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4654 case AMDGPU::V_LSHRREV_B32_e32:
4655 case AMDGPU::V_LSHRREV_B32_e64:
4656 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4657 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4658 case AMDGPU::V_LSHRREV_B32_e32_vi:
4659 case AMDGPU::V_LSHRREV_B32_e64_vi:
4660 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4661 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4663 case AMDGPU::V_ASHRREV_I32_e32:
4664 case AMDGPU::V_ASHRREV_I32_e64:
4665 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4666 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4667 case AMDGPU::V_ASHRREV_I32_e32_vi:
4668 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4669 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4670 case AMDGPU::V_ASHRREV_I32_e64_vi:
4672 case AMDGPU::V_LSHLREV_B32_e32:
4673 case AMDGPU::V_LSHLREV_B32_e64:
4674 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4675 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4676 case AMDGPU::V_LSHLREV_B32_e32_vi:
4677 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4678 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4679 case AMDGPU::V_LSHLREV_B32_e64_vi:
4681 case AMDGPU::V_LSHLREV_B16_e32:
4682 case AMDGPU::V_LSHLREV_B16_e64:
4683 case AMDGPU::V_LSHLREV_B16_e32_vi:
4684 case AMDGPU::V_LSHLREV_B16_e64_vi:
4685 case AMDGPU::V_LSHLREV_B16_gfx10:
4687 case AMDGPU::V_LSHRREV_B16_e32:
4688 case AMDGPU::V_LSHRREV_B16_e64:
4689 case AMDGPU::V_LSHRREV_B16_e32_vi:
4690 case AMDGPU::V_LSHRREV_B16_e64_vi:
4691 case AMDGPU::V_LSHRREV_B16_gfx10:
4693 case AMDGPU::V_ASHRREV_I16_e32:
4694 case AMDGPU::V_ASHRREV_I16_e64:
4695 case AMDGPU::V_ASHRREV_I16_e32_vi:
4696 case AMDGPU::V_ASHRREV_I16_e64_vi:
4697 case AMDGPU::V_ASHRREV_I16_gfx10:
4699 case AMDGPU::V_LSHLREV_B64_e64:
4700 case AMDGPU::V_LSHLREV_B64_gfx10:
4701 case AMDGPU::V_LSHLREV_B64_vi:
4703 case AMDGPU::V_LSHRREV_B64_e64:
4704 case AMDGPU::V_LSHRREV_B64_gfx10:
4705 case AMDGPU::V_LSHRREV_B64_vi:
4707 case AMDGPU::V_ASHRREV_I64_e64:
4708 case AMDGPU::V_ASHRREV_I64_gfx10:
4709 case AMDGPU::V_ASHRREV_I64_vi:
4711 case AMDGPU::V_PK_LSHLREV_B16:
4712 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4713 case AMDGPU::V_PK_LSHLREV_B16_vi:
4715 case AMDGPU::V_PK_LSHRREV_B16:
4716 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4717 case AMDGPU::V_PK_LSHRREV_B16_vi:
4718 case AMDGPU::V_PK_ASHRREV_I16:
4719 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4720 case AMDGPU::V_PK_ASHRREV_I16_vi:
4727std::optional<StringRef>
4728AMDGPUAsmParser::validateLdsDirect(
const MCInst &Inst) {
4730 using namespace SIInstrFlags;
4731 const unsigned Opcode = Inst.
getOpcode();
4732 const MCInstrDesc &
Desc = MII.
get(Opcode);
4737 if ((
Desc.TSFlags & Enc) == 0)
4738 return std::nullopt;
4740 for (
auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4741 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4745 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4748 return StringRef(
"lds_direct is not supported on this GPU");
4751 return StringRef(
"lds_direct cannot be used with this instruction");
4753 if (SrcName != OpName::src0)
4754 return StringRef(
"lds_direct may be used as src0 only");
4758 return std::nullopt;
4762 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4763 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4764 if (
Op.isFlatOffset())
4765 return Op.getStartLoc();
4770bool AMDGPUAsmParser::validateOffset(
const MCInst &Inst,
4773 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4779 return validateFlatOffset(Inst,
Operands);
4782 return validateSMEMOffset(Inst,
Operands);
4788 const unsigned OffsetSize = 24;
4789 if (!
isUIntN(OffsetSize - 1,
Op.getImm())) {
4791 Twine(
"expected a ") + Twine(OffsetSize - 1) +
4792 "-bit unsigned offset for buffer ops");
4796 const unsigned OffsetSize = 16;
4797 if (!
isUIntN(OffsetSize,
Op.getImm())) {
4799 Twine(
"expected a ") + Twine(OffsetSize) +
"-bit unsigned offset");
4806bool AMDGPUAsmParser::validateFlatOffset(
const MCInst &Inst,
4813 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4817 if (!hasFlatOffsets() &&
Op.getImm() != 0) {
4819 "flat offset modifier is not supported on this GPU");
4826 bool AllowNegative =
4829 if (!
isIntN(OffsetSize,
Op.getImm()) || (!AllowNegative &&
Op.getImm() < 0)) {
4831 Twine(
"expected a ") +
4832 (AllowNegative ? Twine(OffsetSize) +
"-bit signed offset"
4833 : Twine(OffsetSize - 1) +
"-bit unsigned offset"));
4842 for (
unsigned i = 2, e =
Operands.size(); i != e; ++i) {
4843 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4844 if (
Op.isSMEMOffset() ||
Op.isSMEMOffsetMod())
4845 return Op.getStartLoc();
4850bool AMDGPUAsmParser::validateSMEMOffset(
const MCInst &Inst,
4860 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4876 ?
"expected a 23-bit unsigned offset for buffer ops"
4877 :
isGFX12Plus() ?
"expected a 24-bit signed offset"
4878 : (
isVI() || IsBuffer) ?
"expected a 20-bit unsigned offset"
4879 :
"expected a 21-bit signed offset");
4884bool AMDGPUAsmParser::validateSOPLiteral(
const MCInst &Inst)
const {
4886 const MCInstrDesc &
Desc = MII.
get(Opcode);
4890 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4891 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4893 const int OpIndices[] = { Src0Idx, Src1Idx };
4895 unsigned NumExprs = 0;
4896 unsigned NumLiterals = 0;
4899 for (
int OpIdx : OpIndices) {
4900 if (
OpIdx == -1)
break;
4905 if (MO.
isImm() && !isInlineConstant(Inst,
OpIdx)) {
4906 uint64_t
Value =
static_cast<uint64_t
>(MO.
getImm());
4907 if (NumLiterals == 0 || LiteralValue !=
Value) {
4911 }
else if (MO.
isExpr()) {
4917 return NumLiterals + NumExprs <= 1;
4920bool AMDGPUAsmParser::validateOpSel(
const MCInst &Inst) {
4923 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4933 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4934 if (OpSelIdx != -1) {
4938 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel_hi);
4939 if (OpSelHiIdx != -1) {
4948 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4958 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
4959 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
4960 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4961 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel_hi);
4963 const MCOperand &Src0 = Inst.
getOperand(Src0Idx);
4964 const MCOperand &Src1 = Inst.
getOperand(Src1Idx);
4970 auto VerifyOneSGPR = [
OpSel, OpSelHi](
unsigned Index) ->
bool {
4972 return ((OpSel & Mask) == 0) && ((OpSelHi &
Mask) == 0);
4982 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
4983 if (Src2Idx != -1) {
4984 const MCOperand &Src2 = Inst.
getOperand(Src2Idx);
4994bool AMDGPUAsmParser::validateTrue16OpSel(
const MCInst &Inst) {
4995 if (!hasTrue16Insts())
4997 const MCRegisterInfo *
MRI = getMRI();
4999 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
5005 if (OpSelOpValue == 0)
5007 unsigned OpCount = 0;
5008 for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
5009 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
5010 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), OpName);
5015 MRI->getRegClass(AMDGPU::VGPR_16RegClassID).contains(
Op.getReg())) {
5017 bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);
5018 if (OpSelOpIsHi != VGPRSuffixIsHi)
5027bool AMDGPUAsmParser::validateNeg(
const MCInst &Inst, AMDGPU::OpName OpName) {
5028 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
5041 int NegIdx = AMDGPU::getNamedOperandIdx(
Opc, OpName);
5052 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
5053 AMDGPU::OpName::src1_modifiers,
5054 AMDGPU::OpName::src2_modifiers};
5056 for (
unsigned i = 0; i < 3; ++i) {
5066bool AMDGPUAsmParser::validateDPP(
const MCInst &Inst,
5069 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dpp_ctrl);
5070 if (DppCtrlIdx >= 0) {
5077 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl,
Operands);
5078 Error(S,
isGFX12() ?
"DP ALU dpp only supports row_share"
5079 :
"DP ALU dpp only supports row_newbcast");
5084 int Dpp8Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dpp8);
5085 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
5088 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
5090 const MCOperand &Src1 = Inst.
getOperand(Src1Idx);
5095 Error(S,
"invalid operand for instruction");
5100 "src1 immediate operand invalid for instruction");
5110bool AMDGPUAsmParser::validateVccOperand(MCRegister
Reg)
const {
5111 auto FB = getFeatureBits();
5112 return (FB[AMDGPU::FeatureWavefrontSize64] &&
Reg == AMDGPU::VCC) ||
5113 (FB[AMDGPU::FeatureWavefrontSize32] &&
Reg == AMDGPU::VCC_LO);
5117bool AMDGPUAsmParser::validateVOPLiteral(
const MCInst &Inst,
5120 const MCInstrDesc &
Desc = MII.
get(Opcode);
5121 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
5123 !HasMandatoryLiteral && !
isVOPD(Opcode))
5128 unsigned NumExprs = 0;
5129 unsigned NumLiterals = 0;
5132 for (
int OpIdx : OpIndices) {
5142 if (MO.
isImm() && !isInlineConstant(Inst,
OpIdx)) {
5143 uint64_t
Value =
static_cast<uint64_t
>(MO.
getImm());
5147 HasMandatoryLiteral);
5153 !IsForcedFP64 && (!has64BitLiterals() ||
Desc.getSize() != 4)) {
5154 Error(getLitLoc(
Operands),
"invalid operand for instruction");
5158 if (IsFP64 && IsValid32Op && !IsForcedFP64)
5161 if (NumLiterals == 0 || LiteralValue !=
Value) {
5165 }
else if (MO.
isExpr()) {
5169 NumLiterals += NumExprs;
5174 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
5175 Error(getLitLoc(
Operands),
"literal operands are not supported");
5179 if (NumLiterals > 1) {
5180 Error(getLitLoc(
Operands,
true),
"only one unique literal operand is allowed");
5204bool AMDGPUAsmParser::validateAGPRLdSt(
const MCInst &Inst)
const {
5212 ? AMDGPU::OpName::data0
5213 : AMDGPU::OpName::vdata;
5215 const MCRegisterInfo *
MRI = getMRI();
5221 if (Data2Areg >= 0 && Data2Areg != DataAreg)
5225 auto FB = getFeatureBits();
5226 if (FB[AMDGPU::FeatureGFX90AInsts]) {
5227 if (DataAreg < 0 || DstAreg < 0)
5229 return DstAreg == DataAreg;
5232 return DstAreg < 1 && DataAreg < 1;
5235bool AMDGPUAsmParser::validateVGPRAlign(
const MCInst &Inst)
const {
5236 auto FB = getFeatureBits();
5237 if (!FB[AMDGPU::FeatureRequiresAlignedVGPRs])
5241 const MCRegisterInfo *
MRI = getMRI();
5244 if (FB[AMDGPU::FeatureGFX90AInsts] &&
Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5247 if (FB[AMDGPU::FeatureGFX1250Insts]) {
5251 case AMDGPU::DS_LOAD_TR6_B96:
5252 case AMDGPU::DS_LOAD_TR6_B96_gfx12:
5256 case AMDGPU::GLOBAL_LOAD_TR6_B96:
5257 case AMDGPU::GLOBAL_LOAD_TR6_B96_gfx1250: {
5261 int VAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
5262 if (VAddrIdx != -1) {
5264 MCRegister
Sub =
MRI->getSubReg(
Op.getReg(), AMDGPU::sub0);
5265 if ((
Sub - AMDGPU::VGPR0) & 1)
5270 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR:
5271 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR_gfx1250:
5276 const MCRegisterClass &VGPR32 =
MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5277 const MCRegisterClass &AGPR32 =
MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5283 MCRegister
Sub =
MRI->getSubReg(
Op.getReg(), AMDGPU::sub0);
5297 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
5298 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
5300 return Op.getStartLoc();
5305bool AMDGPUAsmParser::validateBLGP(
const MCInst &Inst,
5308 int BlgpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::blgp);
5311 SMLoc BLGPLoc = getBLGPLoc(
Operands);
5314 bool IsNeg = StringRef(BLGPLoc.
getPointer()).starts_with(
"neg:");
5315 auto FB = getFeatureBits();
5316 bool UsesNeg =
false;
5317 if (FB[AMDGPU::FeatureGFX940Insts]) {
5319 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
5320 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
5321 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
5322 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
5327 if (IsNeg == UsesNeg)
5331 UsesNeg ?
"invalid modifier: blgp is not supported"
5332 :
"invalid modifier: neg is not supported");
5337bool AMDGPUAsmParser::validateWaitCnt(
const MCInst &Inst,
5343 if (
Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
5344 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
5345 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
5346 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
5349 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::sdst);
5352 if (
Reg == AMDGPU::SGPR_NULL)
5356 Error(RegLoc,
"src0 must be null");
5360bool AMDGPUAsmParser::validateDS(
const MCInst &Inst,
5366 return validateGWS(Inst,
Operands);
5371 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::gds);
5376 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS,
Operands);
5377 Error(S,
"gds modifier is not supported on this GPU");
5385bool AMDGPUAsmParser::validateGWS(
const MCInst &Inst,
5387 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5391 if (
Opc != AMDGPU::DS_GWS_INIT_vi &&
Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5392 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5395 const MCRegisterInfo *
MRI = getMRI();
5396 const MCRegisterClass &VGPR32 =
MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5398 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::data0);
5401 auto RegIdx =
Reg - (VGPR32.
contains(
Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5404 Error(RegLoc,
"vgpr must be even aligned");
5411bool AMDGPUAsmParser::validateCoherencyBits(
const MCInst &Inst,
5413 const SMLoc &IDLoc) {
5414 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.
getOpcode(),
5415 AMDGPU::OpName::cpol);
5423 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol,
Operands);
5426 Error(S,
"scale_offset is not supported on this GPU");
5429 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol,
Operands);
5432 Error(S,
"nv is not supported on this GPU");
5437 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol,
Operands);
5440 Error(S,
"scale_offset is not supported for this instruction");
5444 return validateTHAndScopeBits(Inst,
Operands, CPol);
5449 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol,
Operands);
5450 Error(S,
"cache policy is not supported for SMRD instructions");
5454 Error(IDLoc,
"invalid cache policy for SMEM instruction");
5463 if (!(TSFlags & AllowSCCModifier)) {
5464 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol,
Operands);
5468 "scc modifier is not supported for this instruction on this GPU");
5479 :
"instruction must use glc");
5484 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol,
Operands);
5487 &CStr.data()[CStr.find(
isGFX940() ?
"sc0" :
"glc")]);
5489 :
"instruction must not use glc");
5497bool AMDGPUAsmParser::validateTHAndScopeBits(
const MCInst &Inst,
5499 const unsigned CPol) {
5503 const unsigned Opcode = Inst.
getOpcode();
5504 const MCInstrDesc &TID = MII.
get(Opcode);
5507 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol,
Operands);
5515 return PrintError(
"instruction must use th:TH_ATOMIC_RETURN");
5523 return PrintError(
"invalid th value for SMEM instruction");
5530 return PrintError(
"scope and th combination is not valid");
5536 return PrintError(
"invalid th value for atomic instructions");
5539 return PrintError(
"invalid th value for store instructions");
5542 return PrintError(
"invalid th value for load instructions");
5548bool AMDGPUAsmParser::validateTFE(
const MCInst &Inst,
5551 if (
Desc.mayStore() &&
5553 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE,
Operands);
5555 Error(Loc,
"TFE modifier has no meaning for store instructions");
5563bool AMDGPUAsmParser::validateSetVgprMSB(
const MCInst &Inst,
5565 if (Inst.
getOpcode() != AMDGPU::S_SET_VGPR_MSB_gfx12)
5569 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::simm16);
5571 SMLoc Loc =
Operands[1]->getStartLoc();
5572 Error(Loc,
"s_set_vgpr_msb accepts values in range [0..255]");
5579bool AMDGPUAsmParser::validateWMMA(
const MCInst &Inst,
5585 auto validateFmt = [&](AMDGPU::OpName FmtOp, AMDGPU::OpName SrcOp) ->
bool {
5586 int FmtIdx = AMDGPU::getNamedOperandIdx(
Opc, FmtOp);
5590 int SrcIdx = AMDGPU::getNamedOperandIdx(
Opc, SrcOp);
5592 TRI->getRegClass(
Desc.operands()[SrcIdx].RegClass).getSizeInBits();
5597 static const char *FmtNames[] = {
"MATRIX_FMT_FP8",
"MATRIX_FMT_BF8",
5598 "MATRIX_FMT_FP6",
"MATRIX_FMT_BF6",
5602 "wrong register tuple size for " + Twine(FmtNames[Fmt]));
5606 return validateFmt(AMDGPU::OpName::matrix_a_fmt, AMDGPU::OpName::src0) &&
5607 validateFmt(AMDGPU::OpName::matrix_b_fmt, AMDGPU::OpName::src1);
5610bool AMDGPUAsmParser::validateInstruction(
const MCInst &Inst,
5613 if (
auto ErrMsg = validateLdsDirect(Inst)) {
5617 if (!validateTrue16OpSel(Inst)) {
5619 "op_sel operand conflicts with 16-bit operand suffix");
5622 if (!validateSOPLiteral(Inst)) {
5624 "only one unique literal operand is allowed");
5627 if (!validateVOPLiteral(Inst,
Operands)) {
5630 if (!validateConstantBusLimitations(Inst,
Operands)) {
5633 if (!validateVOPD(Inst,
Operands)) {
5636 if (!validateIntClampSupported(Inst)) {
5638 "integer clamping is not supported on this GPU");
5641 if (!validateOpSel(Inst)) {
5643 "invalid op_sel operand");
5646 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5648 "invalid neg_lo operand");
5651 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5653 "invalid neg_hi operand");
5656 if (!validateDPP(Inst,
Operands)) {
5660 if (!validateMIMGD16(Inst)) {
5662 "d16 modifier is not supported on this GPU");
5665 if (!validateMIMGDim(Inst,
Operands)) {
5666 Error(IDLoc,
"missing dim operand");
5669 if (!validateTensorR128(Inst)) {
5671 "instruction must set modifier r128=0");
5674 if (!validateMIMGMSAA(Inst)) {
5676 "invalid dim; must be MSAA type");
5679 if (!validateMIMGDataSize(Inst, IDLoc)) {
5682 if (!validateMIMGAddrSize(Inst, IDLoc))
5684 if (!validateMIMGAtomicDMask(Inst)) {
5686 "invalid atomic image dmask");
5689 if (!validateMIMGGatherDMask(Inst)) {
5691 "invalid image_gather dmask: only one bit must be set");
5694 if (!validateMovrels(Inst,
Operands)) {
5697 if (!validateOffset(Inst,
Operands)) {
5700 if (!validateMAIAccWrite(Inst,
Operands)) {
5703 if (!validateMAISrc2(Inst,
Operands)) {
5706 if (!validateMFMA(Inst,
Operands)) {
5709 if (!validateCoherencyBits(Inst,
Operands, IDLoc)) {
5713 if (!validateAGPRLdSt(Inst)) {
5714 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5715 ?
"invalid register class: data and dst should be all VGPR or AGPR"
5716 :
"invalid register class: agpr loads and stores not supported on this GPU"
5720 if (!validateVGPRAlign(Inst)) {
5722 "invalid register class: vgpr tuples must be 64 bit aligned");
5729 if (!validateBLGP(Inst,
Operands)) {
5733 if (!validateDivScale(Inst)) {
5734 Error(IDLoc,
"ABS not allowed in VOP3B instructions");
5737 if (!validateWaitCnt(Inst,
Operands)) {
5740 if (!validateTFE(Inst,
Operands)) {
5743 if (!validateSetVgprMSB(Inst,
Operands)) {
5746 if (!validateWMMA(Inst,
Operands)) {
5755 unsigned VariantID = 0);
5759 unsigned VariantID);
5761bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
5766bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5767 const FeatureBitset &FBS,
5768 ArrayRef<unsigned> Variants) {
5769 for (
auto Variant : Variants) {
5777bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5778 const SMLoc &IDLoc) {
5779 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5782 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5787 getParser().clearPendingErrors();
5791 StringRef VariantName = getMatchedVariantName();
5792 if (!VariantName.
empty() && isSupportedMnemo(Mnemo, FBS)) {
5795 " variant of this instruction is not supported"));
5799 if (
isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5800 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5802 FeatureBitset FeaturesWS32 = getFeatureBits();
5803 FeaturesWS32.
flip(AMDGPU::FeatureWavefrontSize64)
5804 .
flip(AMDGPU::FeatureWavefrontSize32);
5805 FeatureBitset AvailableFeaturesWS32 =
5806 ComputeAvailableFeatures(FeaturesWS32);
5808 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5809 return Error(IDLoc,
"instruction requires wavesize=32");
5813 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5814 return Error(IDLoc,
"instruction not supported on this GPU");
5819 return Error(IDLoc,
"invalid instruction" + Suggestion);
5825 const auto &
Op = ((AMDGPUOperand &)*
Operands[InvalidOprIdx]);
5826 if (
Op.isToken() && InvalidOprIdx > 1) {
5827 const auto &PrevOp = ((AMDGPUOperand &)*
Operands[InvalidOprIdx - 1]);
5828 return PrevOp.isToken() && PrevOp.getToken() ==
"::";
5833bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc,
unsigned &Opcode,
5836 uint64_t &ErrorInfo,
5837 bool MatchingInlineAsm) {
5840 unsigned Result = Match_Success;
5841 for (
auto Variant : getMatchedVariants()) {
5843 auto R = MatchInstructionImpl(
Operands, Inst, EI, MatchingInlineAsm,
5848 if (R == Match_Success || R == Match_MissingFeature ||
5849 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5850 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5851 Result != Match_MissingFeature)) {
5855 if (R == Match_Success)
5859 if (Result == Match_Success) {
5860 if (!validateInstruction(Inst, IDLoc,
Operands)) {
5868 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5874 case Match_MissingFeature:
5878 return Error(IDLoc,
"operands are not valid for this GPU or mode");
5880 case Match_InvalidOperand: {
5881 SMLoc ErrorLoc = IDLoc;
5882 if (ErrorInfo != ~0ULL) {
5883 if (ErrorInfo >=
Operands.size()) {
5884 return Error(IDLoc,
"too few operands for instruction");
5886 ErrorLoc = ((AMDGPUOperand &)*
Operands[ErrorInfo]).getStartLoc();
5887 if (ErrorLoc == SMLoc())
5891 return Error(ErrorLoc,
"invalid VOPDY instruction");
5893 return Error(ErrorLoc,
"invalid operand for instruction");
5896 case Match_MnemonicFail:
5902bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5907 if (getParser().parseAbsoluteExpression(Tmp)) {
5910 Ret =
static_cast<uint32_t
>(Tmp);
5914bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5915 if (!getSTI().getTargetTriple().isAMDGCN())
5916 return TokError(
"directive only supported for amdgcn architecture");
5918 std::string TargetIDDirective;
5919 SMLoc TargetStart = getTok().getLoc();
5920 if (getParser().parseEscapedString(TargetIDDirective))
5923 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5924 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
5925 return getParser().Error(TargetRange.
Start,
5926 (Twine(
".amdgcn_target directive's target id ") +
5927 Twine(TargetIDDirective) +
5928 Twine(
" does not match the specified target id ") +
5929 Twine(getTargetStreamer().getTargetID()->
toString())).str());
5934bool AMDGPUAsmParser::OutOfRangeError(SMRange
Range) {
5938bool AMDGPUAsmParser::calculateGPRBlocks(
5939 const FeatureBitset &Features,
const MCExpr *VCCUsed,
5940 const MCExpr *FlatScrUsed,
bool XNACKUsed,
5941 std::optional<bool> EnableWavefrontSize32,
const MCExpr *NextFreeVGPR,
5942 SMRange VGPRRange,
const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5943 const MCExpr *&VGPRBlocks,
const MCExpr *&SGPRBlocks) {
5949 const MCExpr *
NumSGPRs = NextFreeSGPR;
5950 int64_t EvaluatedSGPRs;
5955 unsigned MaxAddressableNumSGPRs =
5958 if (
NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
Version.Major >= 8 &&
5959 !Features.
test(FeatureSGPRInitBug) &&
5960 static_cast<uint64_t
>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5961 return OutOfRangeError(SGPRRange);
5963 const MCExpr *ExtraSGPRs =
5967 if (
NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5968 (
Version.Major <= 7 || Features.
test(FeatureSGPRInitBug)) &&
5969 static_cast<uint64_t
>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5970 return OutOfRangeError(SGPRRange);
5972 if (Features.
test(FeatureSGPRInitBug))
5979 auto GetNumGPRBlocks = [&Ctx](
const MCExpr *NumGPR,
5980 unsigned Granule) ->
const MCExpr * {
5984 const MCExpr *AlignToGPR =
5986 const MCExpr *DivGPR =
5992 VGPRBlocks = GetNumGPRBlocks(
6001bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
6002 if (!getSTI().getTargetTriple().isAMDGCN())
6003 return TokError(
"directive only supported for amdgcn architecture");
6006 return TokError(
"directive only supported for amdhsa OS");
6008 StringRef KernelName;
6009 if (getParser().parseIdentifier(KernelName))
6012 AMDGPU::MCKernelDescriptor KD =
6024 const MCExpr *NextFreeVGPR = ZeroExpr;
6026 const MCExpr *NamedBarCnt = ZeroExpr;
6027 uint64_t SharedVGPRCount = 0;
6028 uint64_t PreloadLength = 0;
6029 uint64_t PreloadOffset = 0;
6031 const MCExpr *NextFreeSGPR = ZeroExpr;
6034 unsigned ImpliedUserSGPRCount = 0;
6038 std::optional<unsigned> ExplicitUserSGPRCount;
6039 const MCExpr *ReserveVCC = OneExpr;
6040 const MCExpr *ReserveFlatScr = OneExpr;
6041 std::optional<bool> EnableWavefrontSize32;
6047 SMRange IDRange = getTok().getLocRange();
6048 if (!parseId(
ID,
"expected .amdhsa_ directive or .end_amdhsa_kernel"))
6051 if (
ID ==
".end_amdhsa_kernel")
6055 return TokError(
".amdhsa_ directives cannot be repeated");
6057 SMLoc ValStart = getLoc();
6058 const MCExpr *ExprVal;
6059 if (getParser().parseExpression(ExprVal))
6061 SMLoc ValEnd = getLoc();
6062 SMRange ValRange = SMRange(ValStart, ValEnd);
6065 uint64_t Val = IVal;
6066 bool EvaluatableExpr;
6067 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
6069 return OutOfRangeError(ValRange);
6073#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
6074 if (!isUInt<ENTRY##_WIDTH>(Val)) \
6075 return OutOfRangeError(RANGE); \
6076 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
6081#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
6083 return Error(IDRange.Start, "directive should have resolvable expression", \
6086 if (
ID ==
".amdhsa_group_segment_fixed_size") {
6089 return OutOfRangeError(ValRange);
6091 }
else if (
ID ==
".amdhsa_private_segment_fixed_size") {
6094 return OutOfRangeError(ValRange);
6096 }
else if (
ID ==
".amdhsa_kernarg_size") {
6098 return OutOfRangeError(ValRange);
6100 }
else if (
ID ==
".amdhsa_user_sgpr_count") {
6102 ExplicitUserSGPRCount = Val;
6103 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_buffer") {
6107 "directive is not supported with architected flat scratch",
6110 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
6113 ImpliedUserSGPRCount += 4;
6114 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_length") {
6117 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6120 return OutOfRangeError(ValRange);
6124 ImpliedUserSGPRCount += Val;
6125 PreloadLength = Val;
6127 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_offset") {
6130 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6133 return OutOfRangeError(ValRange);
6137 PreloadOffset = Val;
6138 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_ptr") {
6141 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
6144 ImpliedUserSGPRCount += 2;
6145 }
else if (
ID ==
".amdhsa_user_sgpr_queue_ptr") {
6148 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
6151 ImpliedUserSGPRCount += 2;
6152 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_segment_ptr") {
6155 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
6158 ImpliedUserSGPRCount += 2;
6159 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_id") {
6162 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
6165 ImpliedUserSGPRCount += 2;
6166 }
else if (
ID ==
".amdhsa_user_sgpr_flat_scratch_init") {
6169 "directive is not supported with architected flat scratch",
6173 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
6176 ImpliedUserSGPRCount += 2;
6177 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_size") {
6180 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
6183 ImpliedUserSGPRCount += 1;
6184 }
else if (
ID ==
".amdhsa_wavefront_size32") {
6186 if (IVersion.
Major < 10)
6187 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
6188 EnableWavefrontSize32 = Val;
6190 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
6192 }
else if (
ID ==
".amdhsa_uses_dynamic_stack") {
6194 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
6196 }
else if (
ID ==
".amdhsa_system_sgpr_private_segment_wavefront_offset") {
6199 "directive is not supported with architected flat scratch",
6202 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6204 }
else if (
ID ==
".amdhsa_enable_private_segment") {
6208 "directive is not supported without architected flat scratch",
6211 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6213 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_x") {
6215 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
6217 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_y") {
6219 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
6221 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_z") {
6223 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
6225 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_info") {
6227 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
6229 }
else if (
ID ==
".amdhsa_system_vgpr_workitem_id") {
6231 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
6233 }
else if (
ID ==
".amdhsa_next_free_vgpr") {
6234 VGPRRange = ValRange;
6235 NextFreeVGPR = ExprVal;
6236 }
else if (
ID ==
".amdhsa_next_free_sgpr") {
6237 SGPRRange = ValRange;
6238 NextFreeSGPR = ExprVal;
6239 }
else if (
ID ==
".amdhsa_accum_offset") {
6241 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6242 AccumOffset = ExprVal;
6243 }
else if (
ID ==
".amdhsa_named_barrier_count") {
6245 return Error(IDRange.
Start,
"directive requires gfx1250+", IDRange);
6246 NamedBarCnt = ExprVal;
6247 }
else if (
ID ==
".amdhsa_reserve_vcc") {
6249 return OutOfRangeError(ValRange);
6250 ReserveVCC = ExprVal;
6251 }
else if (
ID ==
".amdhsa_reserve_flat_scratch") {
6252 if (IVersion.
Major < 7)
6253 return Error(IDRange.
Start,
"directive requires gfx7+", IDRange);
6256 "directive is not supported with architected flat scratch",
6259 return OutOfRangeError(ValRange);
6260 ReserveFlatScr = ExprVal;
6261 }
else if (
ID ==
".amdhsa_reserve_xnack_mask") {
6262 if (IVersion.
Major < 8)
6263 return Error(IDRange.
Start,
"directive requires gfx8+", IDRange);
6265 return OutOfRangeError(ValRange);
6266 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
6267 return getParser().Error(IDRange.
Start,
".amdhsa_reserve_xnack_mask does not match target id",
6269 }
else if (
ID ==
".amdhsa_float_round_mode_32") {
6271 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
6273 }
else if (
ID ==
".amdhsa_float_round_mode_16_64") {
6275 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
6277 }
else if (
ID ==
".amdhsa_float_denorm_mode_32") {
6279 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
6281 }
else if (
ID ==
".amdhsa_float_denorm_mode_16_64") {
6283 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
6285 }
else if (
ID ==
".amdhsa_dx10_clamp") {
6286 if (IVersion.
Major >= 12)
6287 return Error(IDRange.
Start,
"directive unsupported on gfx12+", IDRange);
6289 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
6291 }
else if (
ID ==
".amdhsa_ieee_mode") {
6292 if (IVersion.
Major >= 12)
6293 return Error(IDRange.
Start,
"directive unsupported on gfx12+", IDRange);
6295 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
6297 }
else if (
ID ==
".amdhsa_fp16_overflow") {
6298 if (IVersion.
Major < 9)
6299 return Error(IDRange.
Start,
"directive requires gfx9+", IDRange);
6301 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
6303 }
else if (
ID ==
".amdhsa_tg_split") {
6305 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6308 }
else if (
ID ==
".amdhsa_workgroup_processor_mode") {
6311 "directive unsupported on " + getSTI().
getCPU(), IDRange);
6313 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
6315 }
else if (
ID ==
".amdhsa_memory_ordered") {
6316 if (IVersion.
Major < 10)
6317 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
6319 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
6321 }
else if (
ID ==
".amdhsa_forward_progress") {
6322 if (IVersion.
Major < 10)
6323 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
6325 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
6327 }
else if (
ID ==
".amdhsa_shared_vgpr_count") {
6329 if (IVersion.
Major < 10 || IVersion.
Major >= 12)
6330 return Error(IDRange.
Start,
"directive requires gfx10 or gfx11",
6332 SharedVGPRCount = Val;
6334 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
6336 }
else if (
ID ==
".amdhsa_inst_pref_size") {
6337 if (IVersion.
Major < 11)
6338 return Error(IDRange.
Start,
"directive requires gfx11+", IDRange);
6339 if (IVersion.
Major == 11) {
6341 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
6345 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
6348 }
else if (
ID ==
".amdhsa_exception_fp_ieee_invalid_op") {
6351 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
6353 }
else if (
ID ==
".amdhsa_exception_fp_denorm_src") {
6355 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
6357 }
else if (
ID ==
".amdhsa_exception_fp_ieee_div_zero") {
6360 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
6362 }
else if (
ID ==
".amdhsa_exception_fp_ieee_overflow") {
6364 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
6366 }
else if (
ID ==
".amdhsa_exception_fp_ieee_underflow") {
6368 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
6370 }
else if (
ID ==
".amdhsa_exception_fp_ieee_inexact") {
6372 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
6374 }
else if (
ID ==
".amdhsa_exception_int_div_zero") {
6376 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
6378 }
else if (
ID ==
".amdhsa_round_robin_scheduling") {
6379 if (IVersion.
Major < 12)
6380 return Error(IDRange.
Start,
"directive requires gfx12+", IDRange);
6382 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
6385 return Error(IDRange.
Start,
"unknown .amdhsa_kernel directive", IDRange);
6388#undef PARSE_BITS_ENTRY
6391 if (!Seen.
contains(
".amdhsa_next_free_vgpr"))
6392 return TokError(
".amdhsa_next_free_vgpr directive is required");
6394 if (!Seen.
contains(
".amdhsa_next_free_sgpr"))
6395 return TokError(
".amdhsa_next_free_sgpr directive is required");
6397 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
6402 if (PreloadLength) {
6408 const MCExpr *VGPRBlocks;
6409 const MCExpr *SGPRBlocks;
6410 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
6411 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
6412 EnableWavefrontSize32, NextFreeVGPR,
6413 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
6417 int64_t EvaluatedVGPRBlocks;
6418 bool VGPRBlocksEvaluatable =
6419 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
6420 if (VGPRBlocksEvaluatable &&
6422 static_cast<uint64_t
>(EvaluatedVGPRBlocks))) {
6423 return OutOfRangeError(VGPRRange);
6427 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
6428 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT,
getContext());
6430 int64_t EvaluatedSGPRBlocks;
6431 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
6433 static_cast<uint64_t
>(EvaluatedSGPRBlocks)))
6434 return OutOfRangeError(SGPRRange);
6437 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
6438 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
getContext());
6440 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
6441 return TokError(
"amdgpu_user_sgpr_count smaller than than implied by "
6442 "enabled user SGPRs");
6446 return TokError(
"too many user SGPRs enabled");
6450 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
6451 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT,
getContext());
6455 return TokError(
"too many user SGPRs enabled");
6459 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
6460 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT,
getContext());
6465 return TokError(
"Kernarg size should be resolvable");
6466 uint64_t kernarg_size = IVal;
6467 if (PreloadLength && kernarg_size &&
6468 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
6469 return TokError(
"Kernarg preload length + offset is larger than the "
6470 "kernarg segment size");
6473 if (!Seen.
contains(
".amdhsa_accum_offset"))
6474 return TokError(
".amdhsa_accum_offset directive is required");
6475 int64_t EvaluatedAccum;
6476 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
6477 uint64_t UEvaluatedAccum = EvaluatedAccum;
6478 if (AccumEvaluatable &&
6479 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
6480 return TokError(
"accum_offset should be in range [4..256] in "
6483 int64_t EvaluatedNumVGPR;
6484 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
6487 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
6488 return TokError(
"accum_offset exceeds total VGPR allocation");
6494 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6495 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6501 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
6502 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
6505 if (IVersion.
Major >= 10 && IVersion.
Major < 12) {
6507 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6508 return TokError(
"shared_vgpr_count directive not valid on "
6509 "wavefront size 32");
6512 if (VGPRBlocksEvaluatable &&
6513 (SharedVGPRCount * 2 +
static_cast<uint64_t
>(EvaluatedVGPRBlocks) >
6515 return TokError(
"shared_vgpr_count*2 + "
6516 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6521 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
6522 NextFreeVGPR, NextFreeSGPR,
6523 ReserveVCC, ReserveFlatScr);
6527bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6529 if (ParseAsAbsoluteExpression(
Version))
6532 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(
Version);
6536bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef
ID,
6537 AMDGPUMCKernelCodeT &
C) {
6540 if (
ID ==
"max_scratch_backing_memory_byte_size") {
6541 Parser.eatToEndOfStatement();
6545 SmallString<40> ErrStr;
6546 raw_svector_ostream Err(ErrStr);
6547 if (!
C.ParseKernelCodeT(
ID, getParser(), Err)) {
6548 return TokError(Err.
str());
6552 if (
ID ==
"enable_wavefront_size32") {
6555 return TokError(
"enable_wavefront_size32=1 is only allowed on GFX10+");
6556 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
6557 return TokError(
"enable_wavefront_size32=1 requires +WavefrontSize32");
6559 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
6560 return TokError(
"enable_wavefront_size32=0 requires +WavefrontSize64");
6564 if (
ID ==
"wavefront_size") {
6565 if (
C.wavefront_size == 5) {
6567 return TokError(
"wavefront_size=5 is only allowed on GFX10+");
6568 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
6569 return TokError(
"wavefront_size=5 requires +WavefrontSize32");
6570 }
else if (
C.wavefront_size == 6) {
6571 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
6572 return TokError(
"wavefront_size=6 requires +WavefrontSize64");
6579bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6580 AMDGPUMCKernelCodeT KernelCode;
6589 if (!parseId(
ID,
"expected value identifier or .end_amd_kernel_code_t"))
6592 if (
ID ==
".end_amd_kernel_code_t")
6595 if (ParseAMDKernelCodeTValue(
ID, KernelCode))
6600 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6605bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6606 StringRef KernelName;
6607 if (!parseId(KernelName,
"expected symbol name"))
6610 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6617bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6618 if (!getSTI().getTargetTriple().isAMDGCN()) {
6619 return Error(getLoc(),
6620 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6624 auto TargetIDDirective = getLexer().getTok().getStringContents();
6625 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
6626 return Error(getParser().getTok().getLoc(),
"target id must match options");
6628 getTargetStreamer().EmitISAVersion();
6634bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6637 std::string HSAMetadataString;
6642 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6643 return Error(getLoc(),
"invalid HSA metadata");
6650bool AMDGPUAsmParser::ParseToEndDirective(
const char *AssemblerDirectiveBegin,
6651 const char *AssemblerDirectiveEnd,
6652 std::string &CollectString) {
6654 raw_string_ostream CollectStream(CollectString);
6656 getLexer().setSkipSpace(
false);
6658 bool FoundEnd =
false;
6661 CollectStream << getTokenStr();
6665 if (trySkipId(AssemblerDirectiveEnd)) {
6670 CollectStream << Parser.parseStringToEndOfStatement()
6671 <<
getContext().getAsmInfo()->getSeparatorString();
6673 Parser.eatToEndOfStatement();
6676 getLexer().setSkipSpace(
true);
6679 return TokError(Twine(
"expected directive ") +
6680 Twine(AssemblerDirectiveEnd) + Twine(
" not found"));
6687bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6693 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6694 if (!PALMetadata->setFromString(
String))
6695 return Error(getLoc(),
"invalid PAL metadata");
6700bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6702 return Error(getLoc(),
6704 "not available on non-amdpal OSes")).str());
6707 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6708 PALMetadata->setLegacy();
6711 if (ParseAsAbsoluteExpression(
Key)) {
6712 return TokError(Twine(
"invalid value in ") +
6716 return TokError(Twine(
"expected an even number of values in ") +
6719 if (ParseAsAbsoluteExpression(
Value)) {
6720 return TokError(Twine(
"invalid value in ") +
6723 PALMetadata->setRegister(
Key,
Value);
6732bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6733 if (getParser().checkForValidSection())
6737 SMLoc NameLoc = getLoc();
6738 if (getParser().parseIdentifier(Name))
6739 return TokError(
"expected identifier in directive");
6742 if (getParser().parseComma())
6748 SMLoc SizeLoc = getLoc();
6749 if (getParser().parseAbsoluteExpression(
Size))
6752 return Error(SizeLoc,
"size must be non-negative");
6753 if (
Size > LocalMemorySize)
6754 return Error(SizeLoc,
"size is too large");
6756 int64_t Alignment = 4;
6758 SMLoc AlignLoc = getLoc();
6759 if (getParser().parseAbsoluteExpression(Alignment))
6762 return Error(AlignLoc,
"alignment must be a power of two");
6767 if (Alignment >= 1u << 31)
6768 return Error(AlignLoc,
"alignment is too large");
6774 Symbol->redefineIfPossible();
6775 if (!
Symbol->isUndefined())
6776 return Error(NameLoc,
"invalid symbol redefinition");
6778 getTargetStreamer().emitAMDGPULDS(Symbol,
Size,
Align(Alignment));
6782bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6783 StringRef IDVal = DirectiveID.
getString();
6786 if (IDVal ==
".amdhsa_kernel")
6787 return ParseDirectiveAMDHSAKernel();
6789 if (IDVal ==
".amdhsa_code_object_version")
6790 return ParseDirectiveAMDHSACodeObjectVersion();
6794 return ParseDirectiveHSAMetadata();
6796 if (IDVal ==
".amd_kernel_code_t")
6797 return ParseDirectiveAMDKernelCodeT();
6799 if (IDVal ==
".amdgpu_hsa_kernel")
6800 return ParseDirectiveAMDGPUHsaKernel();
6802 if (IDVal ==
".amd_amdgpu_isa")
6803 return ParseDirectiveISAVersion();
6807 Twine(
" directive is "
6808 "not available on non-amdhsa OSes"))
6813 if (IDVal ==
".amdgcn_target")
6814 return ParseDirectiveAMDGCNTarget();
6816 if (IDVal ==
".amdgpu_lds")
6817 return ParseDirectiveAMDGPULDS();
6820 return ParseDirectivePALMetadataBegin();
6823 return ParseDirectivePALMetadata();
6828bool AMDGPUAsmParser::subtargetHasRegister(
const MCRegisterInfo &
MRI,
6830 if (
MRI.regsOverlap(TTMP12_TTMP13_TTMP14_TTMP15,
Reg))
6834 if (
MRI.regsOverlap(SGPR104_SGPR105,
Reg))
6835 return hasSGPR104_SGPR105();
6838 case SRC_SHARED_BASE_LO:
6839 case SRC_SHARED_BASE:
6840 case SRC_SHARED_LIMIT_LO:
6841 case SRC_SHARED_LIMIT:
6842 case SRC_PRIVATE_BASE_LO:
6843 case SRC_PRIVATE_BASE:
6844 case SRC_PRIVATE_LIMIT_LO:
6845 case SRC_PRIVATE_LIMIT:
6847 case SRC_FLAT_SCRATCH_BASE_LO:
6848 case SRC_FLAT_SCRATCH_BASE_HI:
6849 return hasGloballyAddressableScratch();
6850 case SRC_POPS_EXITING_WAVE_ID:
6862 return (
isVI() ||
isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6891 if (
MRI.regsOverlap(SGPR102_SGPR103,
Reg))
6892 return hasSGPR102_SGPR103();
6900 ParseStatus Res = parseVOPD(
Operands);
6905 Res = MatchOperandParserImpl(
Operands, Mnemonic);
6917 SMLoc LBraceLoc = getLoc();
6922 auto Loc = getLoc();
6925 Error(Loc,
"expected a register");
6929 RBraceLoc = getLoc();
6934 "expected a comma or a closing square bracket"))
6938 if (
Operands.size() - Prefix > 1) {
6940 AMDGPUOperand::CreateToken(
this,
"[", LBraceLoc));
6941 Operands.push_back(AMDGPUOperand::CreateToken(
this,
"]", RBraceLoc));
6950StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6952 setForcedEncodingSize(0);
6953 setForcedDPP(
false);
6954 setForcedSDWA(
false);
6956 if (
Name.consume_back(
"_e64_dpp")) {
6958 setForcedEncodingSize(64);
6961 if (
Name.consume_back(
"_e64")) {
6962 setForcedEncodingSize(64);
6965 if (
Name.consume_back(
"_e32")) {
6966 setForcedEncodingSize(32);
6969 if (
Name.consume_back(
"_dpp")) {
6973 if (
Name.consume_back(
"_sdwa")) {
6974 setForcedSDWA(
true);
6982 unsigned VariantID);
6988 Name = parseMnemonicSuffix(Name);
6994 Operands.push_back(AMDGPUOperand::CreateToken(
this, Name, NameLoc));
6996 bool IsMIMG = Name.starts_with(
"image_");
6999 OperandMode
Mode = OperandMode_Default;
7001 Mode = OperandMode_NSA;
7005 checkUnsupportedInstruction(Name, NameLoc);
7006 if (!Parser.hasPendingError()) {
7009 :
"not a valid operand.";
7010 Error(getLoc(), Msg);
7029ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
7032 if (!trySkipId(Name))
7035 Operands.push_back(AMDGPUOperand::CreateToken(
this, Name, S));
7039ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
const char *Prefix,
7048ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
7050 std::function<
bool(int64_t &)> ConvertResult) {
7054 ParseStatus Res = parseIntWithPrefix(Prefix,
Value);
7058 if (ConvertResult && !ConvertResult(
Value)) {
7059 Error(S,
"invalid " + StringRef(Prefix) +
" value.");
7062 Operands.push_back(AMDGPUOperand::CreateImm(
this,
Value, S, ImmTy));
7066ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
7068 bool (*ConvertResult)(int64_t &)) {
7077 const unsigned MaxSize = 4;
7081 for (
int I = 0; ; ++
I) {
7083 SMLoc Loc = getLoc();
7087 if (
Op != 0 &&
Op != 1)
7088 return Error(Loc,
"invalid " + StringRef(Prefix) +
" value.");
7095 if (
I + 1 == MaxSize)
7096 return Error(getLoc(),
"expected a closing square bracket");
7102 Operands.push_back(AMDGPUOperand::CreateImm(
this, Val, S, ImmTy));
7106ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
7108 AMDGPUOperand::ImmTy ImmTy) {
7112 if (trySkipId(Name)) {
7114 }
else if (trySkipId(
"no", Name)) {
7121 return Error(S,
"r128 modifier is not supported on this GPU");
7122 if (Name ==
"a16" && !
hasA16())
7123 return Error(S,
"a16 modifier is not supported on this GPU");
7125 if (
isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
7126 ImmTy = AMDGPUOperand::ImmTyR128A16;
7128 Operands.push_back(AMDGPUOperand::CreateImm(
this, Bit, S, ImmTy));
7132unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
7133 bool &Disabling)
const {
7134 Disabling =
Id.consume_front(
"no");
7137 return StringSwitch<unsigned>(Id)
7144 return StringSwitch<unsigned>(Id)
7154 SMLoc StringLoc = getLoc();
7156 int64_t CPolVal = 0;
7176 ResScope = parseScope(
Operands, Scope);
7189 if (trySkipId(
"nv")) {
7193 }
else if (trySkipId(
"no",
"nv")) {
7200 if (trySkipId(
"scale_offset")) {
7204 }
else if (trySkipId(
"no",
"scale_offset")) {
7217 Operands.push_back(AMDGPUOperand::CreateImm(
this, CPolVal, StringLoc,
7218 AMDGPUOperand::ImmTyCPol));
7223 SMLoc OpLoc = getLoc();
7224 unsigned Enabled = 0, Seen = 0;
7228 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
7235 return Error(S,
"dlc modifier is not supported on this GPU");
7238 return Error(S,
"scc modifier is not supported on this GPU");
7241 return Error(S,
"duplicate cache policy modifier");
7253 AMDGPUOperand::CreateImm(
this,
Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
7262 ParseStatus Res = parseStringOrIntWithPrefix(
7263 Operands,
"scope", {
"SCOPE_CU",
"SCOPE_SE",
"SCOPE_DEV",
"SCOPE_SYS"},
7277 ParseStatus Res = parseStringWithPrefix(
"th",
Value, StringLoc);
7281 if (
Value ==
"TH_DEFAULT")
7283 else if (
Value ==
"TH_STORE_LU" ||
Value ==
"TH_LOAD_WB" ||
7284 Value ==
"TH_LOAD_NT_WB") {
7285 return Error(StringLoc,
"invalid th value");
7286 }
else if (
Value.consume_front(
"TH_ATOMIC_")) {
7288 }
else if (
Value.consume_front(
"TH_LOAD_")) {
7290 }
else if (
Value.consume_front(
"TH_STORE_")) {
7293 return Error(StringLoc,
"invalid th value");
7296 if (
Value ==
"BYPASS")
7301 TH |= StringSwitch<int64_t>(
Value)
7311 .Default(0xffffffff);
7313 TH |= StringSwitch<int64_t>(
Value)
7324 .Default(0xffffffff);
7327 if (TH == 0xffffffff)
7328 return Error(StringLoc,
"invalid th value");
7335 AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
7336 AMDGPUOperand::ImmTy ImmT, int64_t
Default = 0,
7337 std::optional<unsigned> InsertAt = std::nullopt) {
7338 auto i = OptionalIdx.find(ImmT);
7339 if (i != OptionalIdx.end()) {
7340 unsigned Idx = i->second;
7341 const AMDGPUOperand &
Op =
7342 static_cast<const AMDGPUOperand &
>(*
Operands[Idx]);
7346 Op.addImmOperands(Inst, 1);
7348 if (InsertAt.has_value())
7355ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
7361 StringLoc = getLoc();
7366ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7372 SMLoc StringLoc = getLoc();
7376 Value = getTokenStr();
7380 if (
Value == Ids[IntVal])
7385 if (IntVal < 0 || IntVal >= (int64_t)Ids.
size())
7386 return Error(StringLoc,
"invalid " + Twine(Name) +
" value");
7391ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7393 AMDGPUOperand::ImmTy
Type) {
7397 ParseStatus Res = parseStringOrIntWithPrefix(
Operands, Name, Ids, IntVal);
7399 Operands.push_back(AMDGPUOperand::CreateImm(
this, IntVal, S,
Type));
7408bool AMDGPUAsmParser::tryParseFmt(
const char *Pref,
7412 SMLoc Loc = getLoc();
7414 auto Res = parseIntWithPrefix(Pref, Val);
7420 if (Val < 0 || Val > MaxVal) {
7421 Error(Loc, Twine(
"out of range ", StringRef(Pref)));
7430 AMDGPUOperand::ImmTy ImmTy) {
7431 const char *Pref =
"index_key";
7433 SMLoc Loc = getLoc();
7434 auto Res = parseIntWithPrefix(Pref, ImmVal);
7438 if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
7439 ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
7440 (ImmVal < 0 || ImmVal > 1))
7441 return Error(Loc, Twine(
"out of range ", StringRef(Pref)));
7443 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
7444 return Error(Loc, Twine(
"out of range ", StringRef(Pref)));
7446 Operands.push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc, ImmTy));
7451 return tryParseIndexKey(
Operands, AMDGPUOperand::ImmTyIndexKey8bit);
7455 return tryParseIndexKey(
Operands, AMDGPUOperand::ImmTyIndexKey16bit);
7459 return tryParseIndexKey(
Operands, AMDGPUOperand::ImmTyIndexKey32bit);
7464 AMDGPUOperand::ImmTy
Type) {
7465 return parseStringOrIntWithPrefix(
Operands, Name,
7466 {
"MATRIX_FMT_FP8",
"MATRIX_FMT_BF8",
7467 "MATRIX_FMT_FP6",
"MATRIX_FMT_BF6",
7473 return tryParseMatrixFMT(
Operands,
"matrix_a_fmt",
7474 AMDGPUOperand::ImmTyMatrixAFMT);
7478 return tryParseMatrixFMT(
Operands,
"matrix_b_fmt",
7479 AMDGPUOperand::ImmTyMatrixBFMT);
7484 AMDGPUOperand::ImmTy
Type) {
7485 return parseStringOrIntWithPrefix(
7486 Operands, Name, {
"MATRIX_SCALE_ROW0",
"MATRIX_SCALE_ROW1"},
Type);
7490 return tryParseMatrixScale(
Operands,
"matrix_a_scale",
7491 AMDGPUOperand::ImmTyMatrixAScale);
7495 return tryParseMatrixScale(
Operands,
"matrix_b_scale",
7496 AMDGPUOperand::ImmTyMatrixBScale);
7501 AMDGPUOperand::ImmTy
Type) {
7502 return parseStringOrIntWithPrefix(
7504 {
"MATRIX_SCALE_FMT_E8",
"MATRIX_SCALE_FMT_E5M3",
"MATRIX_SCALE_FMT_E4M3"},
7509 return tryParseMatrixScaleFmt(
Operands,
"matrix_a_scale_fmt",
7510 AMDGPUOperand::ImmTyMatrixAScaleFmt);
7514 return tryParseMatrixScaleFmt(
Operands,
"matrix_b_scale_fmt",
7515 AMDGPUOperand::ImmTyMatrixBScaleFmt);
7520ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &
Format) {
7521 using namespace llvm::AMDGPU::MTBUFFormat;
7527 for (
int I = 0;
I < 2; ++
I) {
7528 if (Dfmt == DFMT_UNDEF && !tryParseFmt(
"dfmt", DFMT_MAX, Dfmt))
7531 if (Nfmt == NFMT_UNDEF && !tryParseFmt(
"nfmt", NFMT_MAX, Nfmt))
7536 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
7542 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
7545 Dfmt = (Dfmt ==
DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7546 Nfmt = (Nfmt ==
NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7552ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &
Format) {
7553 using namespace llvm::AMDGPU::MTBUFFormat;
7557 if (!tryParseFmt(
"format", UFMT_MAX, Fmt))
7560 if (Fmt == UFMT_UNDEF)
7567bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
7569 StringRef FormatStr,
7571 using namespace llvm::AMDGPU::MTBUFFormat;
7575 if (
Format != DFMT_UNDEF) {
7581 if (
Format != NFMT_UNDEF) {
7586 Error(Loc,
"unsupported format");
7590ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7593 using namespace llvm::AMDGPU::MTBUFFormat;
7597 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
7602 SMLoc Loc = getLoc();
7603 if (!parseId(Str,
"expected a format string") ||
7604 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
7606 if (Dfmt == DFMT_UNDEF)
7607 return Error(Loc,
"duplicate numeric format");
7608 if (Nfmt == NFMT_UNDEF)
7609 return Error(Loc,
"duplicate data format");
7612 Dfmt = (Dfmt ==
DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7613 Nfmt = (Nfmt ==
NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7617 if (Ufmt == UFMT_UNDEF)
7618 return Error(FormatLoc,
"unsupported format");
7627ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7630 using namespace llvm::AMDGPU::MTBUFFormat;
7633 if (Id == UFMT_UNDEF)
7637 return Error(Loc,
"unified format is not supported on this GPU");
7643ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &
Format) {
7644 using namespace llvm::AMDGPU::MTBUFFormat;
7645 SMLoc Loc = getLoc();
7650 return Error(Loc,
"out of range format");
7655ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &
Format) {
7656 using namespace llvm::AMDGPU::MTBUFFormat;
7662 StringRef FormatStr;
7663 SMLoc Loc = getLoc();
7664 if (!parseId(FormatStr,
"expected a format string"))
7667 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc,
Format);
7669 Res = parseSymbolicSplitFormat(FormatStr, Loc,
Format);
7679 return parseNumericFormat(
Format);
7683 using namespace llvm::AMDGPU::MTBUFFormat;
7687 SMLoc Loc = getLoc();
7697 AMDGPUOperand::CreateImm(
this,
Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7716 Res = parseSymbolicOrNumericFormat(
Format);
7721 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands[
Size - 2]);
7722 assert(
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7729 return Error(getLoc(),
"duplicate format");
7735 parseIntWithPrefix(
"offset",
Operands, AMDGPUOperand::ImmTyOffset);
7737 Res = parseIntWithPrefix(
"inst_offset",
Operands,
7738 AMDGPUOperand::ImmTyInstOffset);
7745 parseNamedBit(
"r128",
Operands, AMDGPUOperand::ImmTyR128A16);
7747 Res = parseNamedBit(
"a16",
Operands, AMDGPUOperand::ImmTyA16);
7753 parseIntWithPrefix(
"blgp",
Operands, AMDGPUOperand::ImmTyBLGP);
7756 parseOperandArrayWithPrefix(
"neg",
Operands, AMDGPUOperand::ImmTyBLGP);
7766 OptionalImmIndexMap OptionalIdx;
7768 unsigned OperandIdx[4];
7769 unsigned EnMask = 0;
7772 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
7773 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
7778 OperandIdx[SrcIdx] = Inst.
size();
7779 Op.addRegOperands(Inst, 1);
7786 OperandIdx[SrcIdx] = Inst.
size();
7792 if (
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7793 Op.addImmOperands(Inst, 1);
7797 if (
Op.isToken() && (
Op.getToken() ==
"done" ||
Op.getToken() ==
"row_en"))
7801 OptionalIdx[
Op.getImmTy()] = i;
7807 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7814 for (
auto i = 0; i < SrcIdx; ++i) {
7816 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7841 IntVal =
encode(ISA, IntVal, CntVal);
7842 if (CntVal !=
decode(ISA, IntVal)) {
7844 IntVal =
encode(ISA, IntVal, -1);
7852bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7854 SMLoc CntLoc = getLoc();
7855 StringRef CntName = getTokenStr();
7862 SMLoc ValLoc = getLoc();
7871 if (CntName ==
"vmcnt" || CntName ==
"vmcnt_sat") {
7873 }
else if (CntName ==
"expcnt" || CntName ==
"expcnt_sat") {
7875 }
else if (CntName ==
"lgkmcnt" || CntName ==
"lgkmcnt_sat") {
7878 Error(CntLoc,
"invalid counter name " + CntName);
7883 Error(ValLoc,
"too large value for " + CntName);
7892 Error(getLoc(),
"expected a counter name");
7907 if (!parseCnt(Waitcnt))
7915 Operands.push_back(AMDGPUOperand::CreateImm(
this, Waitcnt, S));
7919bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7920 SMLoc FieldLoc = getLoc();
7921 StringRef FieldName = getTokenStr();
7926 SMLoc ValueLoc = getLoc();
7933 if (FieldName ==
"instid0") {
7935 }
else if (FieldName ==
"instskip") {
7937 }
else if (FieldName ==
"instid1") {
7940 Error(FieldLoc,
"invalid field name " + FieldName);
7959 .Case(
"VALU_DEP_1", 1)
7960 .Case(
"VALU_DEP_2", 2)
7961 .Case(
"VALU_DEP_3", 3)
7962 .Case(
"VALU_DEP_4", 4)
7963 .Case(
"TRANS32_DEP_1", 5)
7964 .Case(
"TRANS32_DEP_2", 6)
7965 .Case(
"TRANS32_DEP_3", 7)
7966 .Case(
"FMA_ACCUM_CYCLE_1", 8)
7967 .Case(
"SALU_CYCLE_1", 9)
7968 .Case(
"SALU_CYCLE_2", 10)
7969 .Case(
"SALU_CYCLE_3", 11)
7977 Delay |=
Value << Shift;
7987 if (!parseDelay(Delay))
7995 Operands.push_back(AMDGPUOperand::CreateImm(
this, Delay, S));
8000AMDGPUOperand::isSWaitCnt()
const {
8004bool AMDGPUOperand::isSDelayALU()
const {
return isImm(); }
8010void AMDGPUAsmParser::depCtrError(SMLoc Loc,
int ErrorId,
8011 StringRef DepCtrName) {
8014 Error(Loc, Twine(
"invalid counter name ", DepCtrName));
8017 Error(Loc, Twine(DepCtrName,
" is not supported on this GPU"));
8020 Error(Loc, Twine(
"duplicate counter name ", DepCtrName));
8023 Error(Loc, Twine(
"invalid value for ", DepCtrName));
8030bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr,
unsigned &UsedOprMask) {
8032 using namespace llvm::AMDGPU::DepCtr;
8034 SMLoc DepCtrLoc = getLoc();
8035 StringRef DepCtrName = getTokenStr();
8045 unsigned PrevOprMask = UsedOprMask;
8046 int CntVal =
encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
8049 depCtrError(DepCtrLoc, CntVal, DepCtrName);
8058 Error(getLoc(),
"expected a counter name");
8063 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
8064 DepCtr = (DepCtr & ~CntValMask) | CntVal;
8069 using namespace llvm::AMDGPU::DepCtr;
8072 SMLoc Loc = getLoc();
8075 unsigned UsedOprMask = 0;
8077 if (!parseDepCtr(DepCtr, UsedOprMask))
8085 Operands.push_back(AMDGPUOperand::CreateImm(
this, DepCtr, Loc));
8089bool AMDGPUOperand::isDepCtr()
const {
return isS16Imm(); }
8095ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
8097 OperandInfoTy &Width) {
8098 using namespace llvm::AMDGPU::Hwreg;
8104 HwReg.Loc = getLoc();
8107 HwReg.IsSymbolic =
true;
8109 }
else if (!
parseExpr(HwReg.Val,
"a register name")) {
8117 if (!skipToken(
AsmToken::Comma,
"expected a comma or a closing parenthesis"))
8127 Width.Loc = getLoc();
8136 using namespace llvm::AMDGPU::Hwreg;
8139 SMLoc Loc = getLoc();
8141 StructuredOpField HwReg(
"id",
"hardware register", HwregId::Width,
8143 StructuredOpField
Offset(
"offset",
"bit offset", HwregOffset::Width,
8144 HwregOffset::Default);
8145 struct : StructuredOpField {
8146 using StructuredOpField::StructuredOpField;
8147 bool validate(AMDGPUAsmParser &Parser)
const override {
8149 return Error(Parser,
"only values from 1 to 32 are legal");
8152 } Width(
"size",
"bitfield width", HwregSize::Width, HwregSize::Default);
8153 ParseStatus Res = parseStructuredOpFields({&HwReg, &
Offset, &Width});
8156 Res = parseHwregFunc(HwReg,
Offset, Width);
8159 if (!validateStructuredOpFields({&HwReg, &
Offset, &Width}))
8161 ImmVal = HwregEncoding::encode(HwReg.Val,
Offset.Val, Width.Val);
8165 parseExpr(ImmVal,
"a hwreg macro, structured immediate"))
8172 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
8174 AMDGPUOperand::CreateImm(
this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
8178bool AMDGPUOperand::isHwreg()
const {
8179 return isImmTy(ImmTyHwreg);
8187AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
8189 OperandInfoTy &Stream) {
8190 using namespace llvm::AMDGPU::SendMsg;
8195 Msg.IsSymbolic =
true;
8197 }
else if (!
parseExpr(Msg.Val,
"a message name")) {
8202 Op.IsDefined =
true;
8205 (
Op.Val =
getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
8208 }
else if (!
parseExpr(
Op.Val,
"an operation name")) {
8213 Stream.IsDefined =
true;
8214 Stream.Loc = getLoc();
8224AMDGPUAsmParser::validateSendMsg(
const OperandInfoTy &Msg,
8225 const OperandInfoTy &
Op,
8226 const OperandInfoTy &Stream) {
8227 using namespace llvm::AMDGPU::SendMsg;
8232 bool Strict = Msg.IsSymbolic;
8236 Error(Msg.Loc,
"specified message id is not supported on this GPU");
8241 Error(Msg.Loc,
"invalid message id");
8247 Error(
Op.Loc,
"message does not support operations");
8249 Error(Msg.Loc,
"missing message operation");
8255 Error(
Op.Loc,
"specified operation id is not supported on this GPU");
8257 Error(
Op.Loc,
"invalid operation id");
8262 Error(Stream.Loc,
"message operation does not support streams");
8266 Error(Stream.Loc,
"invalid message stream id");
8273 using namespace llvm::AMDGPU::SendMsg;
8276 SMLoc Loc = getLoc();
8280 OperandInfoTy
Op(OP_NONE_);
8281 OperandInfoTy Stream(STREAM_ID_NONE_);
8282 if (parseSendMsgBody(Msg,
Op, Stream) &&
8283 validateSendMsg(Msg,
Op, Stream)) {
8288 }
else if (
parseExpr(ImmVal,
"a sendmsg macro")) {
8290 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
8295 Operands.push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
8299bool AMDGPUOperand::isSendMsg()
const {
8300 return isImmTy(ImmTySendMsg);
8314 int Slot = StringSwitch<int>(Str)
8321 return Error(S,
"invalid interpolation slot");
8323 Operands.push_back(AMDGPUOperand::CreateImm(
this, Slot, S,
8324 AMDGPUOperand::ImmTyInterpSlot));
8335 if (!Str.starts_with(
"attr"))
8336 return Error(S,
"invalid interpolation attribute");
8338 StringRef Chan = Str.take_back(2);
8339 int AttrChan = StringSwitch<int>(Chan)
8346 return Error(S,
"invalid or missing interpolation attribute channel");
8348 Str = Str.drop_back(2).drop_front(4);
8351 if (Str.getAsInteger(10, Attr))
8352 return Error(S,
"invalid or missing interpolation attribute number");
8355 return Error(S,
"out of bounds interpolation attribute number");
8359 Operands.push_back(AMDGPUOperand::CreateImm(
this, Attr, S,
8360 AMDGPUOperand::ImmTyInterpAttr));
8361 Operands.push_back(AMDGPUOperand::CreateImm(
8362 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
8371 using namespace llvm::AMDGPU::Exp;
8381 return Error(S, (Id == ET_INVALID)
8382 ?
"invalid exp target"
8383 :
"exp target is not supported on this GPU");
8385 Operands.push_back(AMDGPUOperand::CreateImm(
this, Id, S,
8386 AMDGPUOperand::ImmTyExpTgt));
8395AMDGPUAsmParser::isId(
const AsmToken &Token,
const StringRef Id)
const {
8400AMDGPUAsmParser::isId(
const StringRef Id)
const {
8406 return getTokenKind() ==
Kind;
8409StringRef AMDGPUAsmParser::getId()
const {
8414AMDGPUAsmParser::trySkipId(
const StringRef Id) {
8423AMDGPUAsmParser::trySkipId(
const StringRef Pref,
const StringRef Id) {
8425 StringRef Tok = getTokenStr();
8436 if (isId(Id) && peekToken().is(Kind)) {
8446 if (isToken(Kind)) {
8455 const StringRef ErrMsg) {
8456 if (!trySkipToken(Kind)) {
8457 Error(getLoc(), ErrMsg);
8464AMDGPUAsmParser::parseExpr(int64_t &
Imm, StringRef Expected) {
8468 if (Parser.parseExpression(Expr))
8471 if (Expr->evaluateAsAbsolute(
Imm))
8474 if (Expected.empty()) {
8475 Error(S,
"expected absolute expression");
8477 Error(S, Twine(
"expected ", Expected) +
8478 Twine(
" or an absolute expression"));
8488 if (Parser.parseExpression(Expr))
8492 if (Expr->evaluateAsAbsolute(IntVal)) {
8493 Operands.push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
8495 Operands.push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
8501AMDGPUAsmParser::parseString(StringRef &Val,
const StringRef ErrMsg) {
8503 Val =
getToken().getStringContents();
8507 Error(getLoc(), ErrMsg);
8512AMDGPUAsmParser::parseId(StringRef &Val,
const StringRef ErrMsg) {
8514 Val = getTokenStr();
8518 if (!ErrMsg.
empty())
8519 Error(getLoc(), ErrMsg);
8524AMDGPUAsmParser::getToken()
const {
8525 return Parser.getTok();
8528AsmToken AMDGPUAsmParser::peekToken(
bool ShouldSkipSpace) {
8531 : getLexer().peekTok(ShouldSkipSpace);
8536 auto TokCount = getLexer().peekTokens(Tokens);
8538 for (
auto Idx = TokCount; Idx < Tokens.
size(); ++Idx)
8543AMDGPUAsmParser::getTokenKind()
const {
8544 return getLexer().getKind();
8548AMDGPUAsmParser::getLoc()
const {
8553AMDGPUAsmParser::getTokenStr()
const {
8558AMDGPUAsmParser::lex() {
8563 return ((AMDGPUOperand &)*
Operands[0]).getStartLoc();
8567AMDGPUAsmParser::getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
8569 for (
unsigned i =
Operands.size() - 1; i > 0; --i) {
8570 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
8572 return Op.getStartLoc();
8578AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy
Type,
8580 auto Test = [=](
const AMDGPUOperand&
Op) {
return Op.isImmTy(
Type); };
8584SMLoc AMDGPUAsmParser::getRegLoc(MCRegister
Reg,
8586 auto Test = [=](
const AMDGPUOperand&
Op) {
8587 return Op.isRegKind() &&
Op.getReg() ==
Reg;
8593 bool SearchMandatoryLiterals)
const {
8594 auto Test = [](
const AMDGPUOperand&
Op) {
8595 return Op.IsImmKindLiteral() ||
Op.isExpr();
8598 if (SearchMandatoryLiterals && Loc == getInstLoc(
Operands))
8599 Loc = getMandatoryLitLoc(
Operands);
8604 auto Test = [](
const AMDGPUOperand &
Op) {
8605 return Op.IsImmKindMandatoryLiteral();
8612 auto Test = [](
const AMDGPUOperand&
Op) {
8613 return Op.isImmKindConst();
8629 StringRef
Id = getTokenStr();
8630 SMLoc IdLoc = getLoc();
8636 find_if(Fields, [Id](StructuredOpField *
F) {
return F->Id ==
Id; });
8637 if (
I == Fields.
end())
8638 return Error(IdLoc,
"unknown field");
8639 if ((*I)->IsDefined)
8640 return Error(IdLoc,
"duplicate field");
8643 (*I)->Loc = getLoc();
8646 (*I)->IsDefined =
true;
8653bool AMDGPUAsmParser::validateStructuredOpFields(
8655 return all_of(Fields, [
this](
const StructuredOpField *
F) {
8656 return F->validate(*
this);
8667 const unsigned OrMask,
8668 const unsigned XorMask) {
8677bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &
Op,
const unsigned MinVal,
8678 const unsigned MaxVal,
8679 const Twine &ErrMsg, SMLoc &Loc) {
8696AMDGPUAsmParser::parseSwizzleOperands(
const unsigned OpNum, int64_t*
Op,
8697 const unsigned MinVal,
8698 const unsigned MaxVal,
8699 const StringRef ErrMsg) {
8701 for (
unsigned i = 0; i < OpNum; ++i) {
8702 if (!parseSwizzleOperand(
Op[i], MinVal, MaxVal, ErrMsg, Loc))
8710AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &
Imm) {
8711 using namespace llvm::AMDGPU::Swizzle;
8714 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8715 "expected a 2-bit lane id")) {
8726AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &
Imm) {
8727 using namespace llvm::AMDGPU::Swizzle;
8733 if (!parseSwizzleOperand(GroupSize,
8735 "group size must be in the interval [2,32]",
8740 Error(Loc,
"group size must be a power of two");
8743 if (parseSwizzleOperand(LaneIdx,
8745 "lane id must be in the interval [0,group size - 1]",
8754AMDGPUAsmParser::parseSwizzleReverse(int64_t &
Imm) {
8755 using namespace llvm::AMDGPU::Swizzle;
8760 if (!parseSwizzleOperand(GroupSize,
8762 "group size must be in the interval [2,32]",
8767 Error(Loc,
"group size must be a power of two");
8776AMDGPUAsmParser::parseSwizzleSwap(int64_t &
Imm) {
8777 using namespace llvm::AMDGPU::Swizzle;
8782 if (!parseSwizzleOperand(GroupSize,
8784 "group size must be in the interval [1,16]",
8789 Error(Loc,
"group size must be a power of two");
8798AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &
Imm) {
8799 using namespace llvm::AMDGPU::Swizzle;
8806 SMLoc StrLoc = getLoc();
8807 if (!parseString(Ctl)) {
8810 if (Ctl.
size() != BITMASK_WIDTH) {
8811 Error(StrLoc,
"expected a 5-character mask");
8815 unsigned AndMask = 0;
8816 unsigned OrMask = 0;
8817 unsigned XorMask = 0;
8819 for (
size_t i = 0; i < Ctl.
size(); ++i) {
8823 Error(StrLoc,
"invalid mask");
8844bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &
Imm) {
8845 using namespace llvm::AMDGPU::Swizzle;
8848 Error(getLoc(),
"FFT mode swizzle not supported on this GPU");
8854 if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
8855 "FFT swizzle must be in the interval [0," +
8856 Twine(FFT_SWIZZLE_MAX) + Twine(
']'),
8864bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &
Imm) {
8865 using namespace llvm::AMDGPU::Swizzle;
8868 Error(getLoc(),
"Rotate mode swizzle not supported on this GPU");
8875 if (!parseSwizzleOperand(
Direction, 0, 1,
8876 "direction must be 0 (left) or 1 (right)", Loc))
8880 if (!parseSwizzleOperand(
8881 RotateSize, 0, ROTATE_MAX_SIZE,
8882 "number of threads to rotate must be in the interval [0," +
8883 Twine(ROTATE_MAX_SIZE) + Twine(
']'),
8888 (RotateSize << ROTATE_SIZE_SHIFT);
8893AMDGPUAsmParser::parseSwizzleOffset(int64_t &
Imm) {
8895 SMLoc OffsetLoc = getLoc();
8901 Error(OffsetLoc,
"expected a 16-bit offset");
8908AMDGPUAsmParser::parseSwizzleMacro(int64_t &
Imm) {
8909 using namespace llvm::AMDGPU::Swizzle;
8913 SMLoc ModeLoc = getLoc();
8916 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8917 Ok = parseSwizzleQuadPerm(
Imm);
8918 }
else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8919 Ok = parseSwizzleBitmaskPerm(
Imm);
8920 }
else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8921 Ok = parseSwizzleBroadcast(
Imm);
8922 }
else if (trySkipId(IdSymbolic[ID_SWAP])) {
8923 Ok = parseSwizzleSwap(
Imm);
8924 }
else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8925 Ok = parseSwizzleReverse(
Imm);
8926 }
else if (trySkipId(IdSymbolic[ID_FFT])) {
8927 Ok = parseSwizzleFFT(
Imm);
8928 }
else if (trySkipId(IdSymbolic[ID_ROTATE])) {
8929 Ok = parseSwizzleRotate(
Imm);
8931 Error(ModeLoc,
"expected a swizzle mode");
8934 return Ok && skipToken(
AsmToken::RParen,
"expected a closing parentheses");
8944 if (trySkipId(
"offset")) {
8948 if (trySkipId(
"swizzle")) {
8949 Ok = parseSwizzleMacro(
Imm);
8951 Ok = parseSwizzleOffset(
Imm);
8955 Operands.push_back(AMDGPUOperand::CreateImm(
this,
Imm, S, AMDGPUOperand::ImmTySwizzle));
8963AMDGPUOperand::isSwizzle()
const {
8964 return isImmTy(ImmTySwizzle);
8971int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8973 using namespace llvm::AMDGPU::VGPRIndexMode;
8985 for (
unsigned ModeId = ID_MIN; ModeId <=
ID_MAX; ++ModeId) {
8986 if (trySkipId(IdSymbolic[ModeId])) {
8994 "expected a VGPR index mode or a closing parenthesis" :
8995 "expected a VGPR index mode");
9000 Error(S,
"duplicate VGPR index mode");
9008 "expected a comma or a closing parenthesis"))
9017 using namespace llvm::AMDGPU::VGPRIndexMode;
9023 Imm = parseGPRIdxMacro();
9027 if (getParser().parseAbsoluteExpression(
Imm))
9030 return Error(S,
"invalid immediate: only 4-bit values are legal");
9034 AMDGPUOperand::CreateImm(
this,
Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
9038bool AMDGPUOperand::isGPRIdxMode()
const {
9039 return isImmTy(ImmTyGprIdxMode);
9051 if (isRegister() || isModifier())
9058 assert(Opr.isImm() || Opr.isExpr());
9059 SMLoc Loc = Opr.getStartLoc();
9063 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
9064 Error(Loc,
"expected an absolute expression or a label");
9065 }
else if (Opr.isImm() && !Opr.isS16Imm()) {
9066 Error(Loc,
"expected a 16-bit signed jump offset");
9084void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
9087 OptionalImmIndexMap OptionalIdx;
9088 unsigned FirstOperandIdx = 1;
9089 bool IsAtomicReturn =
false;
9096 for (
unsigned i = FirstOperandIdx, e =
Operands.size(); i != e; ++i) {
9097 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
9101 Op.addRegOperands(Inst, 1);
9105 if (IsAtomicReturn && i == FirstOperandIdx)
9106 Op.addRegOperands(Inst, 1);
9111 if (
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
9112 Op.addImmOperands(Inst, 1);
9124 OptionalIdx[
Op.getImmTy()] = i;
9135bool AMDGPUOperand::isSMRDOffset8()
const {
9139bool AMDGPUOperand::isSMEMOffset()
const {
9141 return isImmLiteral();
9144bool AMDGPUOperand::isSMRDLiteralOffset()
const {
9179bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
9180 if (BoundCtrl == 0 || BoundCtrl == 1) {
9188void AMDGPUAsmParser::onBeginOfFile() {
9189 if (!getParser().getStreamer().getTargetStreamer() ||
9193 if (!getTargetStreamer().getTargetID())
9194 getTargetStreamer().initializeTargetID(getSTI(),
9195 getSTI().getFeatureString());
9198 getTargetStreamer().EmitDirectiveAMDGCNTarget();
9206bool AMDGPUAsmParser::parsePrimaryExpr(
const MCExpr *&Res, SMLoc &EndLoc) {
9210 StringRef TokenId = getTokenStr();
9211 AGVK VK = StringSwitch<AGVK>(TokenId)
9212 .Case(
"max", AGVK::AGVK_Max)
9213 .Case(
"or", AGVK::AGVK_Or)
9214 .Case(
"extrasgprs", AGVK::AGVK_ExtraSGPRs)
9215 .Case(
"totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
9216 .Case(
"alignto", AGVK::AGVK_AlignTo)
9217 .Case(
"occupancy", AGVK::AGVK_Occupancy)
9218 .Default(AGVK::AGVK_None);
9222 uint64_t CommaCount = 0;
9227 if (Exprs.
empty()) {
9229 "empty " + Twine(TokenId) +
" expression");
9232 if (CommaCount + 1 != Exprs.
size()) {
9234 "mismatch of commas in " + Twine(TokenId) +
" expression");
9241 if (getParser().parseExpression(Expr, EndLoc))
9245 if (LastTokenWasComma)
9249 "unexpected token in " + Twine(TokenId) +
" expression");
9255 return getParser().parsePrimaryExpr(Res, EndLoc,
nullptr);
9259 StringRef
Name = getTokenStr();
9260 if (Name ==
"mul") {
9261 return parseIntWithPrefix(
"mul",
Operands,
9265 if (Name ==
"div") {
9266 return parseIntWithPrefix(
"div",
Operands,
9277 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9282 const AMDGPU::OpName
Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9283 AMDGPU::OpName::src2};
9291 int DstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
9296 int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0_modifiers);
9298 if (
DstOp.isReg() &&
9299 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(
DstOp.
getReg())) {
9303 if ((OpSel & (1 << SrcNum)) != 0)
9309void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
9316 OptionalImmIndexMap &OptionalIdx) {
9317 cvtVOP3P(Inst,
Operands, OptionalIdx);
9326 &&
Desc.NumOperands > (OpNum + 1)
9328 &&
Desc.operands()[OpNum + 1].RegClass != -1
9330 &&
Desc.getOperandConstraint(OpNum + 1,
9336 OptionalImmIndexMap OptionalIdx;
9341 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9342 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
9346 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
9348 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9349 }
else if (
Op.isInterpSlot() ||
Op.isInterpAttr() ||
9350 Op.isInterpAttrChan()) {
9352 }
else if (
Op.isImmModifier()) {
9353 OptionalIdx[
Op.getImmTy()] =
I;
9361 AMDGPUOperand::ImmTyHigh);
9365 AMDGPUOperand::ImmTyClamp);
9369 AMDGPUOperand::ImmTyOModSI);
9374 OptionalImmIndexMap OptionalIdx;
9379 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9380 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
9384 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
9386 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9387 }
else if (
Op.isImmModifier()) {
9388 OptionalIdx[
Op.getImmTy()] =
I;
9396 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9405 const AMDGPU::OpName
Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9406 AMDGPU::OpName::src2};
9407 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9408 AMDGPU::OpName::src1_modifiers,
9409 AMDGPU::OpName::src2_modifiers};
9413 for (
int J = 0; J < 3; ++J) {
9414 int OpIdx = AMDGPU::getNamedOperandIdx(
Opc,
Ops[J]);
9418 int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
9421 if ((OpSel & (1 << J)) != 0)
9423 if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
9424 (OpSel & (1 << 3)) != 0)
9430void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
9432 OptionalImmIndexMap OptionalIdx;
9435 int CbszOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::cbsz);
9439 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J)
9440 static_cast<AMDGPUOperand &
>(*
Operands[
I++]).addRegOperands(Inst, 1);
9443 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands[
I]);
9448 if (NumOperands == CbszOpIdx) {
9453 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9454 }
else if (
Op.isImmModifier()) {
9455 OptionalIdx[
Op.getImmTy()] =
I;
9457 Op.addRegOrImmOperands(Inst, 1);
9462 auto CbszIdx = OptionalIdx.find(AMDGPUOperand::ImmTyCBSZ);
9463 if (CbszIdx != OptionalIdx.end()) {
9464 int CbszVal = ((AMDGPUOperand &)*
Operands[CbszIdx->second]).
getImm();
9468 int BlgpOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::blgp);
9469 auto BlgpIdx = OptionalIdx.find(AMDGPUOperand::ImmTyBLGP);
9470 if (BlgpIdx != OptionalIdx.end()) {
9471 int BlgpVal = ((AMDGPUOperand &)*
Operands[BlgpIdx->second]).
getImm();
9482 auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
9483 if (OpselIdx != OptionalIdx.end()) {
9484 OpSel =
static_cast<const AMDGPUOperand &
>(*
Operands[OpselIdx->second])
9488 unsigned OpSelHi = 0;
9489 auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
9490 if (OpselHiIdx != OptionalIdx.end()) {
9491 OpSelHi =
static_cast<const AMDGPUOperand &
>(*
Operands[OpselHiIdx->second])
9494 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9495 AMDGPU::OpName::src1_modifiers};
9497 for (
unsigned J = 0; J < 2; ++J) {
9498 unsigned ModVal = 0;
9499 if (OpSel & (1 << J))
9501 if (OpSelHi & (1 << J))
9504 const int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
9510 OptionalImmIndexMap &OptionalIdx) {
9515 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9516 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
9520 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
9522 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9523 }
else if (
Op.isImmModifier()) {
9524 OptionalIdx[
Op.getImmTy()] =
I;
9526 Op.addRegOrImmOperands(Inst, 1);
9532 AMDGPUOperand::ImmTyScaleSel);
9536 AMDGPUOperand::ImmTyClamp);
9542 AMDGPUOperand::ImmTyByteSel);
9547 AMDGPUOperand::ImmTyOModSI);
9554 auto *it = Inst.
begin();
9555 std::advance(it, AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2_modifiers));
9564 OptionalImmIndexMap OptionalIdx;
9565 cvtVOP3(Inst,
Operands, OptionalIdx);
9569 OptionalImmIndexMap &OptIdx) {
9575 if (
Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
9576 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
9577 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
9578 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
9579 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
9580 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
9588 !(
Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx12 ||
9589 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx12 ||
9590 Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx12 ||
9591 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx12 ||
9592 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp_gfx12 ||
9593 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp_gfx12 ||
9594 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp8_gfx12 ||
9595 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp8_gfx12 ||
9596 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
9597 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9598 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp_gfx1250 ||
9599 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp8_gfx1250 ||
9600 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9601 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9602 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp_gfx1250 ||
9603 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp_gfx1250 ||
9604 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp8_gfx1250 ||
9605 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp8_gfx1250 ||
9606 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_gfx1250 ||
9607 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_gfx1250 ||
9608 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp_gfx1250 ||
9609 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp_gfx1250 ||
9610 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp8_gfx1250 ||
9611 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp8_gfx1250 ||
9612 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_gfx1250 ||
9613 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_gfx1250)) {
9617 int BitOp3Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::bitop3);
9618 if (BitOp3Idx != -1) {
9625 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9626 if (OpSelIdx != -1) {
9630 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel_hi);
9631 if (OpSelHiIdx != -1) {
9638 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_a_fmt);
9639 if (MatrixAFMTIdx != -1) {
9641 AMDGPUOperand::ImmTyMatrixAFMT, 0);
9645 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_b_fmt);
9646 if (MatrixBFMTIdx != -1) {
9648 AMDGPUOperand::ImmTyMatrixBFMT, 0);
9651 int MatrixAScaleIdx =
9652 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_a_scale);
9653 if (MatrixAScaleIdx != -1) {
9655 AMDGPUOperand::ImmTyMatrixAScale, 0);
9658 int MatrixBScaleIdx =
9659 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_b_scale);
9660 if (MatrixBScaleIdx != -1) {
9662 AMDGPUOperand::ImmTyMatrixBScale, 0);
9665 int MatrixAScaleFmtIdx =
9666 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_a_scale_fmt);
9667 if (MatrixAScaleFmtIdx != -1) {
9669 AMDGPUOperand::ImmTyMatrixAScaleFmt, 0);
9672 int MatrixBScaleFmtIdx =
9673 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_b_scale_fmt);
9674 if (MatrixBScaleFmtIdx != -1) {
9676 AMDGPUOperand::ImmTyMatrixBScaleFmt, 0);
9681 AMDGPUOperand::ImmTyMatrixAReuse, 0);
9685 AMDGPUOperand::ImmTyMatrixBReuse, 0);
9687 int NegLoIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::neg_lo);
9691 int NegHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::neg_hi);
9695 const AMDGPU::OpName
Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9696 AMDGPU::OpName::src2};
9697 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9698 AMDGPU::OpName::src1_modifiers,
9699 AMDGPU::OpName::src2_modifiers};
9702 unsigned OpSelHi = 0;
9709 if (OpSelHiIdx != -1)
9718 for (
int J = 0; J < 3; ++J) {
9719 int OpIdx = AMDGPU::getNamedOperandIdx(
Opc,
Ops[J]);
9723 int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
9728 uint32_t ModVal = 0;
9731 if (SrcOp.
isReg() && getMRI()
9738 if ((OpSel & (1 << J)) != 0)
9742 if ((OpSelHi & (1 << J)) != 0)
9745 if ((NegLo & (1 << J)) != 0)
9748 if ((NegHi & (1 << J)) != 0)
9756 OptionalImmIndexMap OptIdx;
9762 unsigned i,
unsigned Opc,
9764 if (AMDGPU::getNamedOperandIdx(
Opc,
OpName) != -1)
9765 ((AMDGPUOperand &)*
Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
9767 ((AMDGPUOperand &)*
Operands[i]).addRegOperands(Inst, 1);
9773 ((AMDGPUOperand &)*
Operands[1]).addRegOperands(Inst, 1);
9776 ((AMDGPUOperand &)*
Operands[1]).addRegOperands(Inst, 1);
9777 ((AMDGPUOperand &)*
Operands[4]).addRegOperands(Inst, 1);
9779 OptionalImmIndexMap OptIdx;
9780 for (
unsigned i = 5; i <
Operands.size(); ++i) {
9781 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
9782 OptIdx[
Op.getImmTy()] = i;
9787 AMDGPUOperand::ImmTyIndexKey8bit);
9791 AMDGPUOperand::ImmTyIndexKey16bit);
9795 AMDGPUOperand::ImmTyIndexKey32bit);
9815 Operands.push_back(AMDGPUOperand::CreateToken(
this,
"::", S));
9816 SMLoc OpYLoc = getLoc();
9819 Operands.push_back(AMDGPUOperand::CreateToken(
this, OpYName, OpYLoc));
9822 return Error(OpYLoc,
"expected a VOPDY instruction after ::");
9831 auto addOp = [&](uint16_t ParsedOprIdx) {
9832 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[ParsedOprIdx]);
9834 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9838 Op.addRegOperands(Inst, 1);
9842 Op.addImmOperands(Inst, 1);
9854 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
9858 const auto &CInfo = InstInfo[CompIdx];
9859 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
9860 for (
unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
9861 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
9862 if (CInfo.hasSrc2Acc())
9863 addOp(CInfo.getIndexOfDstInParsedOperands());
9867 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::bitop3);
9868 if (BitOp3Idx != -1) {
9869 OptionalImmIndexMap OptIdx;
9870 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands.back());
9882bool AMDGPUOperand::isDPP8()
const {
9883 return isImmTy(ImmTyDPP8);
9886bool AMDGPUOperand::isDPPCtrl()
const {
9887 using namespace AMDGPU::DPP;
9889 bool result = isImm() && getImmTy() == ImmTyDppCtrl &&
isUInt<9>(
getImm());
9892 return (
Imm >= DppCtrl::QUAD_PERM_FIRST &&
Imm <= DppCtrl::QUAD_PERM_LAST) ||
9893 (
Imm >= DppCtrl::ROW_SHL_FIRST &&
Imm <= DppCtrl::ROW_SHL_LAST) ||
9894 (
Imm >= DppCtrl::ROW_SHR_FIRST &&
Imm <= DppCtrl::ROW_SHR_LAST) ||
9895 (
Imm >= DppCtrl::ROW_ROR_FIRST &&
Imm <= DppCtrl::ROW_ROR_LAST) ||
9896 (
Imm == DppCtrl::WAVE_SHL1) ||
9897 (
Imm == DppCtrl::WAVE_ROL1) ||
9898 (
Imm == DppCtrl::WAVE_SHR1) ||
9899 (
Imm == DppCtrl::WAVE_ROR1) ||
9900 (
Imm == DppCtrl::ROW_MIRROR) ||
9901 (
Imm == DppCtrl::ROW_HALF_MIRROR) ||
9902 (
Imm == DppCtrl::BCAST15) ||
9903 (
Imm == DppCtrl::BCAST31) ||
9904 (
Imm >= DppCtrl::ROW_SHARE_FIRST &&
Imm <= DppCtrl::ROW_SHARE_LAST) ||
9905 (
Imm >= DppCtrl::ROW_XMASK_FIRST &&
Imm <= DppCtrl::ROW_XMASK_LAST);
9914bool AMDGPUOperand::isBLGP()
const {
9918bool AMDGPUOperand::isS16Imm()
const {
9922bool AMDGPUOperand::isU16Imm()
const {
9930bool AMDGPUAsmParser::parseDimId(
unsigned &Encoding) {
9935 SMLoc Loc =
getToken().getEndLoc();
9936 Token = std::string(getTokenStr());
9938 if (getLoc() != Loc)
9943 if (!parseId(Suffix))
9947 StringRef DimId = Token;
9968 SMLoc Loc = getLoc();
9969 if (!parseDimId(Encoding))
9970 return Error(Loc,
"invalid dim value");
9972 Operands.push_back(AMDGPUOperand::CreateImm(
this, Encoding, S,
9973 AMDGPUOperand::ImmTyDim));
9991 if (!skipToken(
AsmToken::LBrac,
"expected an opening square bracket"))
9994 for (
size_t i = 0; i < 8; ++i) {
9998 SMLoc Loc = getLoc();
9999 if (getParser().parseAbsoluteExpression(Sels[i]))
10001 if (0 > Sels[i] || 7 < Sels[i])
10002 return Error(Loc,
"expected a 3-bit value");
10005 if (!skipToken(
AsmToken::RBrac,
"expected a closing square bracket"))
10009 for (
size_t i = 0; i < 8; ++i)
10010 DPP8 |= (Sels[i] << (i * 3));
10012 Operands.push_back(AMDGPUOperand::CreateImm(
this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
10017AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
10019 if (Ctrl ==
"row_newbcast")
10022 if (Ctrl ==
"row_share" ||
10023 Ctrl ==
"row_xmask")
10026 if (Ctrl ==
"wave_shl" ||
10027 Ctrl ==
"wave_shr" ||
10028 Ctrl ==
"wave_rol" ||
10029 Ctrl ==
"wave_ror" ||
10030 Ctrl ==
"row_bcast")
10033 return Ctrl ==
"row_mirror" ||
10034 Ctrl ==
"row_half_mirror" ||
10035 Ctrl ==
"quad_perm" ||
10036 Ctrl ==
"row_shl" ||
10037 Ctrl ==
"row_shr" ||
10042AMDGPUAsmParser::parseDPPCtrlPerm() {
10045 if (!skipToken(
AsmToken::LBrac,
"expected an opening square bracket"))
10049 for (
int i = 0; i < 4; ++i) {
10054 SMLoc Loc = getLoc();
10055 if (getParser().parseAbsoluteExpression(Temp))
10057 if (Temp < 0 || Temp > 3) {
10058 Error(Loc,
"expected a 2-bit value");
10062 Val += (Temp << i * 2);
10065 if (!skipToken(
AsmToken::RBrac,
"expected a closing square bracket"))
10072AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
10073 using namespace AMDGPU::DPP;
10078 SMLoc Loc = getLoc();
10080 if (getParser().parseAbsoluteExpression(Val))
10083 struct DppCtrlCheck {
10089 DppCtrlCheck
Check = StringSwitch<DppCtrlCheck>(Ctrl)
10090 .Case(
"wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
10091 .Case(
"wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
10092 .Case(
"wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
10093 .Case(
"wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
10094 .Case(
"row_shl", {DppCtrl::ROW_SHL0, 1, 15})
10095 .Case(
"row_shr", {DppCtrl::ROW_SHR0, 1, 15})
10096 .Case(
"row_ror", {DppCtrl::ROW_ROR0, 1, 15})
10097 .Case(
"row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
10098 .Case(
"row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
10099 .Case(
"row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
10103 if (
Check.Ctrl == -1) {
10104 Valid = (
Ctrl ==
"row_bcast" && (Val == 15 || Val == 31));
10112 Error(Loc, Twine(
"invalid ", Ctrl) + Twine(
" value"));
10120 using namespace AMDGPU::DPP;
10123 !isSupportedDPPCtrl(getTokenStr(),
Operands))
10126 SMLoc S = getLoc();
10132 if (Ctrl ==
"row_mirror") {
10133 Val = DppCtrl::ROW_MIRROR;
10134 }
else if (Ctrl ==
"row_half_mirror") {
10135 Val = DppCtrl::ROW_HALF_MIRROR;
10138 if (Ctrl ==
"quad_perm") {
10139 Val = parseDPPCtrlPerm();
10141 Val = parseDPPCtrlSel(Ctrl);
10150 AMDGPUOperand::CreateImm(
this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
10156 OptionalImmIndexMap OptionalIdx;
10163 int OldIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::old);
10165 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2_modifiers);
10166 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
10170 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
10171 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
10175 int VdstInIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
10176 bool IsVOP3CvtSrDpp =
Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
10177 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
10178 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
10179 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
10185 if (OldIdx == NumOperands) {
10187 constexpr int DST_IDX = 0;
10189 }
else if (Src2ModIdx == NumOperands) {
10199 if (IsVOP3CvtSrDpp) {
10208 if (TiedTo != -1) {
10213 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
10215 if (IsDPP8 &&
Op.isDppFI()) {
10218 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10219 }
else if (
Op.isReg()) {
10220 Op.addRegOperands(Inst, 1);
10221 }
else if (
Op.isImm() &&
10223 assert(!
Op.IsImmKindLiteral() &&
"Cannot use literal with DPP");
10224 Op.addImmOperands(Inst, 1);
10225 }
else if (
Op.isImm()) {
10226 OptionalIdx[
Op.getImmTy()] =
I;
10234 AMDGPUOperand::ImmTyClamp);
10240 AMDGPUOperand::ImmTyByteSel);
10247 cvtVOP3P(Inst,
Operands, OptionalIdx);
10249 cvtVOP3OpSel(Inst,
Operands, OptionalIdx);
10256 using namespace llvm::AMDGPU::DPP;
10266 AMDGPUOperand::ImmTyDppFI);
10271 OptionalImmIndexMap OptionalIdx;
10275 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
10276 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
10283 if (TiedTo != -1) {
10288 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
10290 if (
Op.isReg() && validateVccOperand(
Op.getReg())) {
10298 Op.addImmOperands(Inst, 1);
10300 Op.addRegWithFPInputModsOperands(Inst, 2);
10301 }
else if (
Op.isDppFI()) {
10303 }
else if (
Op.isReg()) {
10304 Op.addRegOperands(Inst, 1);
10310 Op.addRegWithFPInputModsOperands(Inst, 2);
10311 }
else if (
Op.isReg()) {
10312 Op.addRegOperands(Inst, 1);
10313 }
else if (
Op.isDPPCtrl()) {
10314 Op.addImmOperands(Inst, 1);
10315 }
else if (
Op.isImm()) {
10317 OptionalIdx[
Op.getImmTy()] =
I;
10325 using namespace llvm::AMDGPU::DPP;
10333 AMDGPUOperand::ImmTyDppFI);
10344 AMDGPUOperand::ImmTy
Type) {
10345 return parseStringOrIntWithPrefix(
10347 {
"BYTE_0",
"BYTE_1",
"BYTE_2",
"BYTE_3",
"WORD_0",
"WORD_1",
"DWORD"},
10352 return parseStringOrIntWithPrefix(
10353 Operands,
"dst_unused", {
"UNUSED_PAD",
"UNUSED_SEXT",
"UNUSED_PRESERVE"},
10354 AMDGPUOperand::ImmTySDWADstUnused);
10378 uint64_t BasicInstType,
10381 using namespace llvm::AMDGPU::SDWA;
10383 OptionalImmIndexMap OptionalIdx;
10384 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
10385 bool SkippedVcc =
false;
10389 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
10390 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
10394 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
10395 if (SkipVcc && !SkippedVcc &&
Op.isReg() &&
10396 (
Op.getReg() == AMDGPU::VCC ||
Op.getReg() == AMDGPU::VCC_LO)) {
10414 Op.addRegOrImmWithInputModsOperands(Inst, 2);
10415 }
else if (
Op.isImm()) {
10417 OptionalIdx[
Op.getImmTy()] =
I;
10421 SkippedVcc =
false;
10425 if (
Opc != AMDGPU::V_NOP_sdwa_gfx10 &&
Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
10426 Opc != AMDGPU::V_NOP_sdwa_vi) {
10428 switch (BasicInstType) {
10432 AMDGPUOperand::ImmTyClamp, 0);
10436 AMDGPUOperand::ImmTyOModSI, 0);
10440 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10444 AMDGPUOperand::ImmTySDWADstUnused,
10445 DstUnused::UNUSED_PRESERVE);
10452 AMDGPUOperand::ImmTyClamp, 0);
10466 AMDGPUOperand::ImmTyClamp, 0);
10472 llvm_unreachable(
"Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
10478 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
10479 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
10480 auto *it = Inst.
begin();
10482 it, AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::src2));
10494#define GET_REGISTER_MATCHER
10495#define GET_MATCHER_IMPLEMENTATION
10496#define GET_MNEMONIC_SPELL_CHECKER
10497#define GET_MNEMONIC_CHECKER
10498#include "AMDGPUGenAsmMatcher.inc"
10504 return parseTokenOp(
"addr64",
Operands);
10506 return parseTokenOp(
"done",
Operands);
10508 return parseTokenOp(
"idxen",
Operands);
10510 return parseTokenOp(
"lds",
Operands);
10512 return parseTokenOp(
"offen",
Operands);
10514 return parseTokenOp(
"off",
Operands);
10515 case MCK_row_95_en:
10516 return parseTokenOp(
"row_en",
Operands);
10518 return parseNamedBit(
"gds",
Operands, AMDGPUOperand::ImmTyGDS);
10520 return parseNamedBit(
"tfe",
Operands, AMDGPUOperand::ImmTyTFE);
10522 return tryCustomParseOperand(
Operands, MCK);
10527unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &
Op,
10533 AMDGPUOperand &Operand = (AMDGPUOperand&)
Op;
10536 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
10538 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
10540 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
10542 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
10544 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
10546 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
10554 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
10556 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
10557 case MCK_SOPPBrTarget:
10558 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
10559 case MCK_VReg32OrOff:
10560 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
10561 case MCK_InterpSlot:
10562 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
10563 case MCK_InterpAttr:
10564 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
10565 case MCK_InterpAttrChan:
10566 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
10568 case MCK_SReg_64_XEXEC:
10578 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
10580 return Match_InvalidOperand;
10589 SMLoc S = getLoc();
10598 return Error(S,
"expected a 16-bit value");
10601 AMDGPUOperand::CreateImm(
this,
Imm, S, AMDGPUOperand::ImmTyEndpgm));
10605bool AMDGPUOperand::isEndpgm()
const {
return isImmTy(ImmTyEndpgm); }
10611bool AMDGPUOperand::isSplitBarrier()
const {
return isInlinableImm(MVT::i32); }
unsigned const MachineRegisterInfo * MRI
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
SmallVector< int16_t, MAX_SRC_OPERANDS_NUM > OperandIndices
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, AMDGPU::OpName OpName)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static bool IsRevOpcode(const unsigned Opcode)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static MCRegister getSpecialRegForName(StringRef RegName)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0, std::optional< unsigned > InsertAt=std::nullopt)
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
#define LLVM_EXTERNAL_VISIBILITY
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Loop::LoopBounds::Direction Direction
mir Rename Register Operands
Register const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Interface definition for SIInstrInfo.
unsigned unsigned DefaultVal
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file implements the SmallBitVector class.
StringSet - A set-like wrapper for the StringMap.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static const char * getRegisterName(MCRegister Reg)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
Target independent representation for an assembler token.
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
bool is(TokenKind K) const
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
void printExpr(raw_ostream &, const MCExpr &) const
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Base class for the full range of assembler expressions which are needed for parsing.
Instances of this class represent a single low-level machine instruction.
unsigned getNumOperands() const
unsigned getOpcode() const
iterator insert(iterator I, const MCOperand &Op)
void addOperand(const MCOperand Op)
const MCOperand & getOperand(unsigned i) const
Describe properties that are true of each instruction in the target description file.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Instances of this class represent operands of the MCInst class.
static MCOperand createExpr(const MCExpr *Val)
static MCOperand createReg(MCRegister Reg)
static MCOperand createImm(int64_t Val)
void setReg(MCRegister Reg)
Set the register number.
MCRegister getReg() const
Returns the register number.
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
constexpr bool isValid() const
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
bool isVariable() const
isVariable - Check if this is a variable symbol.
LLVM_ABI void setVariableValue(const MCExpr *Value)
void setRedefinable(bool Value)
Mark this symbol as redefinable.
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
MCTargetAsmParser - Generic interface to target specific assembly parsers.
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
constexpr unsigned id() const
Represents a location in source code.
static SMLoc getFromPointer(const char *Ptr)
constexpr const char * getPointer() const
constexpr bool isValid() const
Represents a range in source code.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringRef - Represent a constant reference to a string, i.e.
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
size - Get the string size.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
bool consume_front(StringRef Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
bool contains(StringRef key) const
Check if the set contains the given key.
std::pair< typename Base::iterator, bool > insert(StringRef key)
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
std::pair< iterator, bool > insert(const ValueT &V)
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool hasMAIInsts(const MCSubtargetInfo &STI)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCSubtargetInfo &ST)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isGFX1250(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ UNDEF
UNDEF - An undefined node.
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
Context & getContext() const
This is an optimization pass for GlobalISel generic memory operations.
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
FunctionAddr VTableAddr Value
StringMapEntry< Value * > ValueName
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
int popcount(T Value) noexcept
Count the number of set bits in a value.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
static bool isMem(const MachineInstr &MI, unsigned Op)
LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
void PrintError(const Twine &Msg)
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Target & getTheR600Target()
The target for R600 GPUs.
SmallVectorImpl< std::unique_ptr< MCParsedAsmOperand > > OperandVector
FunctionAddr VTableAddr uintptr_t uintptr_t Version
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Sub
Subtraction of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
@ Default
The result values are uniform if and only if all operands are uniform.
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, bool InitMCExpr=true)
Instruction set architecture version.
const MCExpr * compute_pgm_rsrc2
const MCExpr * kernarg_size
const MCExpr * kernarg_preload
const MCExpr * compute_pgm_rsrc3
const MCExpr * private_segment_fixed_size
const MCExpr * compute_pgm_rsrc1
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
const MCExpr * group_segment_fixed_size
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
const MCExpr * kernel_code_properties
static LLVM_ABI const fltSemantics & IEEEsingle() LLVM_READNONE
static constexpr roundingMode rmNearestTiesToEven
static LLVM_ABI const fltSemantics & IEEEdouble() LLVM_READNONE
static LLVM_ABI const fltSemantics & IEEEhalf() LLVM_READNONE
static LLVM_ABI const fltSemantics & BFloat() LLVM_READNONE
opStatus
IEEE-754R 7: Default exception handling.
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...
uint32_t group_segment_fixed_size
uint32_t private_segment_fixed_size