LLVM: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp Source File

//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//


#include "AMDKernelCodeT.h"

#include "MCTargetDesc/AMDGPUInstPrinter.h"

#include "MCTargetDesc/AMDGPUMCAsmInfo.h"

#include "MCTargetDesc/AMDGPUMCExpr.h"

#include "MCTargetDesc/AMDGPUMCKernelDescriptor.h"

#include "MCTargetDesc/AMDGPUMCTargetDesc.h"

#include "MCTargetDesc/AMDGPUTargetStreamer.h"

#include "SIDefines.h"

#include "SIInstrInfo.h"

#include "TargetInfo/AMDGPUTargetInfo.h"

#include "Utils/AMDGPUAsmUtils.h"

#include "Utils/AMDGPUBaseInfo.h"

#include "Utils/AMDKernelCodeTUtils.h"

#include "llvm/ADT/APFloat.h"

#include "llvm/ADT/SmallBitVector.h"

#include "llvm/ADT/StringSet.h"

#include "llvm/ADT/Twine.h"

#include "llvm/BinaryFormat/ELF.h"

#include "llvm/CodeGenTypes/MachineValueType.h"

#include "llvm/MC/MCAsmInfo.h"

#include "llvm/MC/MCContext.h"

#include "llvm/MC/MCExpr.h"

#include "llvm/MC/MCInst.h"

#include "llvm/MC/MCInstrDesc.h"

#include "llvm/MC/MCParser/AsmLexer.h"

#include "llvm/MC/MCParser/MCAsmParser.h"

#include "llvm/MC/MCParser/MCParsedAsmOperand.h"

#include "llvm/MC/MCParser/MCTargetAsmParser.h"

#include "llvm/MC/MCRegisterInfo.h"

#include "llvm/MC/MCSymbol.h"

#include "llvm/MC/TargetRegistry.h"

#include "llvm/Support/AMDGPUMetadata.h"

#include "llvm/Support/AMDHSAKernelDescriptor.h"

#include "llvm/Support/Casting.h"

#include "llvm/Support/Compiler.h"

#include "llvm/Support/MathExtras.h"

#include "llvm/TargetParser/TargetParser.h"

#include <optional>


using namespace llvm;

using namespace llvm::AMDGPU;

using namespace llvm::amdhsa;


namespace {


class AMDGPUAsmParser;


enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };


//===----------------------------------------------------------------------===//

// Operand

//===----------------------------------------------------------------------===//


class AMDGPUOperand : public MCParsedAsmOperand {

  enum KindTy {

    Token,

    Immediate,

    Register,

    Expression

  } Kind;


  SMLoc StartLoc, EndLoc;

  const AMDGPUAsmParser *AsmParser;


public:

  AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)

      : Kind(Kind_), AsmParser(AsmParser_) {}


  using Ptr = std::unique_ptr<AMDGPUOperand>;


  struct Modifiers {

    bool Abs = false;

    bool Neg = false;

    bool Sext = false;

    bool Lit = false;

    bool Lit64 = false;


    bool hasFPModifiers() const { return Abs || Neg; }

    bool hasIntModifiers() const { return Sext; }

    bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }


    int64_t getFPModifiersOperand() const {

      int64_t Operand = 0;

      Operand |= Abs ? SISrcMods::ABS : 0u;

      Operand |= Neg ? SISrcMods::NEG : 0u;

      return Operand;

    }


    int64_t getIntModifiersOperand() const {

      int64_t Operand = 0;

      Operand |= Sext ? SISrcMods::SEXT : 0u;

      return Operand;

    }


    int64_t getModifiersOperand() const {

      assert(!(hasFPModifiers() && hasIntModifiers())

           && "fp and int modifiers should not be used simultaneously");

      if (hasFPModifiers())

        return getFPModifiersOperand();

      if (hasIntModifiers())

        return getIntModifiersOperand();

      return 0;

    }


    friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);

  };


  enum ImmTy {

    ImmTyNone,

    ImmTyGDS,

    ImmTyLDS,

    ImmTyOffen,

    ImmTyIdxen,

    ImmTyAddr64,

    ImmTyOffset,

    ImmTyInstOffset,

    ImmTyOffset0,

    ImmTyOffset1,

    ImmTySMEMOffsetMod,

    ImmTyCPol,

    ImmTyTFE,

    ImmTyD16,

    ImmTyClamp,

    ImmTyOModSI,

    ImmTySDWADstSel,

    ImmTySDWASrc0Sel,

    ImmTySDWASrc1Sel,

    ImmTySDWADstUnused,

    ImmTyDMask,

    ImmTyDim,

    ImmTyUNorm,

    ImmTyDA,

    ImmTyR128A16,

    ImmTyA16,

    ImmTyLWE,

    ImmTyExpTgt,

    ImmTyExpCompr,

    ImmTyExpVM,

    ImmTyFORMAT,

    ImmTyHwreg,

    ImmTyOff,

    ImmTySendMsg,

    ImmTyInterpSlot,

    ImmTyInterpAttr,

    ImmTyInterpAttrChan,

    ImmTyOpSel,

    ImmTyOpSelHi,

    ImmTyNegLo,

    ImmTyNegHi,

    ImmTyIndexKey8bit,

    ImmTyIndexKey16bit,

    ImmTyIndexKey32bit,

    ImmTyDPP8,

    ImmTyDppCtrl,

    ImmTyDppRowMask,

    ImmTyDppBankMask,

    ImmTyDppBoundCtrl,

    ImmTyDppFI,

    ImmTySwizzle,

    ImmTyGprIdxMode,

    ImmTyHigh,

    ImmTyBLGP,

    ImmTyCBSZ,

    ImmTyABID,

    ImmTyEndpgm,

    ImmTyWaitVDST,

    ImmTyWaitEXP,

    ImmTyWaitVAVDst,

    ImmTyWaitVMVSrc,

    ImmTyBitOp3,

    ImmTyMatrixAFMT,

    ImmTyMatrixBFMT,

    ImmTyMatrixAScale,

    ImmTyMatrixBScale,

    ImmTyMatrixAScaleFmt,

    ImmTyMatrixBScaleFmt,

    ImmTyMatrixAReuse,

    ImmTyMatrixBReuse,

    ImmTyScaleSel,

    ImmTyByteSel,

  };


  // Immediate operand kind.

  // It helps to identify the location of an offending operand after an error.

  // Note that regular literals and mandatory literals (KImm) must be handled

  // differently. When looking for an offending operand, we should usually

  // ignore mandatory literals because they are part of the instruction and

  // cannot be changed. Report location of mandatory operands only for VOPD,

  // when both OpX and OpY have a KImm and there are no other literals.

  enum ImmKindTy {

    ImmKindTyNone,

    ImmKindTyLiteral,

    ImmKindTyMandatoryLiteral,

    ImmKindTyConst,

  };


private:

  struct TokOp {

    const char *Data;

    unsigned Length;

  };


  struct ImmOp {

    int64_t Val;

    ImmTy Type;

    bool IsFPImm;

    mutable ImmKindTy Kind;

    Modifiers Mods;

  };


  struct RegOp {

    MCRegister RegNo;

    Modifiers Mods;

  };


  union {

    TokOp Tok;

    ImmOp Imm;

    RegOp Reg;

    const MCExpr *Expr;

  };


public:

  bool isToken() const override { return Kind == Token; }


  bool isSymbolRefExpr() const {

    return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);

  }


  bool isImm() const override {

    return Kind == Immediate;

  }


  void setImmKindNone() const {

    assert(isImm());

    Imm.Kind = ImmKindTyNone;

  }


  void setImmKindLiteral() const {

    assert(isImm());

    Imm.Kind = ImmKindTyLiteral;

  }


  void setImmKindMandatoryLiteral() const {

    assert(isImm());

    Imm.Kind = ImmKindTyMandatoryLiteral;

  }


  void setImmKindConst() const {

    assert(isImm());

    Imm.Kind = ImmKindTyConst;

  }


  bool IsImmKindLiteral() const {

    return isImm() && Imm.Kind == ImmKindTyLiteral;

  }


  bool IsImmKindMandatoryLiteral() const {

    return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;

  }


  bool isImmKindConst() const {

    return isImm() && Imm.Kind == ImmKindTyConst;

  }


  bool isInlinableImm(MVT type) const;

  bool isLiteralImm(MVT type) const;


  bool isRegKind() const {

    return Kind == Register;

  }


  bool isReg() const override {

    return isRegKind() && !hasModifiers();

  }


  bool isRegOrInline(unsigned RCID, MVT type) const {

    return isRegClass(RCID) || isInlinableImm(type);

  }


  bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {

    return isRegOrInline(RCID, type) || isLiteralImm(type);

  }


  bool isRegOrImmWithInt16InputMods() const {

    return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);

  }


  template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {

    return isRegOrImmWithInputMods(

        IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);

  }


  bool isRegOrImmWithInt32InputMods() const {

    return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);

  }


  bool isRegOrInlineImmWithInt16InputMods() const {

    return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);

  }


  template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {

    return isRegOrInline(

        IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);

  }


  bool isRegOrInlineImmWithInt32InputMods() const {

    return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);

  }


  bool isRegOrImmWithInt64InputMods() const {

    return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);

  }


  bool isRegOrImmWithFP16InputMods() const {

    return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);

  }


  template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {

    return isRegOrImmWithInputMods(

        IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);

  }


  bool isRegOrImmWithFP32InputMods() const {

    return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);

  }


  bool isRegOrImmWithFP64InputMods() const {

    return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);

  }


  template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {

    return isRegOrInline(

        IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);

  }


  bool isRegOrInlineImmWithFP32InputMods() const {

    return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);

  }


  bool isRegOrInlineImmWithFP64InputMods() const {

    return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);

  }


  bool isVRegWithInputMods(unsigned RCID) const { return isRegClass(RCID); }


  bool isVRegWithFP32InputMods() const {

    return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);

  }


  bool isVRegWithFP64InputMods() const {

    return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);

  }


  bool isPackedFP16InputMods() const {

    return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);

  }


  bool isPackedVGPRFP32InputMods() const {

    return isRegOrImmWithInputMods(AMDGPU::VReg_64RegClassID, MVT::v2f32);

  }


  bool isVReg() const {

    return isRegClass(AMDGPU::VGPR_32RegClassID) ||

           isRegClass(AMDGPU::VReg_64RegClassID) ||

           isRegClass(AMDGPU::VReg_96RegClassID) ||

           isRegClass(AMDGPU::VReg_128RegClassID) ||

           isRegClass(AMDGPU::VReg_160RegClassID) ||

           isRegClass(AMDGPU::VReg_192RegClassID) ||

           isRegClass(AMDGPU::VReg_256RegClassID) ||

           isRegClass(AMDGPU::VReg_512RegClassID) ||

           isRegClass(AMDGPU::VReg_1024RegClassID);

  }


  bool isVReg32() const {

    return isRegClass(AMDGPU::VGPR_32RegClassID);

  }


  bool isVReg32OrOff() const {

    return isOff() || isVReg32();

  }


  bool isNull() const {

    return isRegKind() && getReg() == AMDGPU::SGPR_NULL;

  }


  bool isVRegWithInputMods() const;

  template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;

  template <bool IsFake16> bool isT16VRegWithInputMods() const;


  bool isSDWAOperand(MVT type) const;

  bool isSDWAFP16Operand() const;

  bool isSDWAFP32Operand() const;

  bool isSDWAInt16Operand() const;

  bool isSDWAInt32Operand() const;


  bool isImmTy(ImmTy ImmT) const {

    return isImm() && Imm.Type == ImmT;

  }


  template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }


  bool isImmLiteral() const { return isImmTy(ImmTyNone); }


  bool isImmModifier() const {

    return isImm() && Imm.Type != ImmTyNone;

  }


  bool isOModSI() const { return isImmTy(ImmTyOModSI); }

  bool isDim() const { return isImmTy(ImmTyDim); }

  bool isR128A16() const { return isImmTy(ImmTyR128A16); }

  bool isOff() const { return isImmTy(ImmTyOff); }

  bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }

  bool isOffen() const { return isImmTy(ImmTyOffen); }

  bool isIdxen() const { return isImmTy(ImmTyIdxen); }

  bool isAddr64() const { return isImmTy(ImmTyAddr64); }

  bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }

  bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }

  bool isGDS() const { return isImmTy(ImmTyGDS); }

  bool isLDS() const { return isImmTy(ImmTyLDS); }

  bool isCPol() const { return isImmTy(ImmTyCPol); }

  bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }

  bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }

  bool isIndexKey32bit() const { return isImmTy(ImmTyIndexKey32bit); }

  bool isMatrixAFMT() const { return isImmTy(ImmTyMatrixAFMT); }

  bool isMatrixBFMT() const { return isImmTy(ImmTyMatrixBFMT); }

  bool isMatrixAScale() const { return isImmTy(ImmTyMatrixAScale); }

  bool isMatrixBScale() const { return isImmTy(ImmTyMatrixBScale); }

  bool isMatrixAScaleFmt() const { return isImmTy(ImmTyMatrixAScaleFmt); }

  bool isMatrixBScaleFmt() const { return isImmTy(ImmTyMatrixBScaleFmt); }

  bool isMatrixAReuse() const { return isImmTy(ImmTyMatrixAReuse); }

  bool isMatrixBReuse() const { return isImmTy(ImmTyMatrixBReuse); }

  bool isTFE() const { return isImmTy(ImmTyTFE); }

  bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }

  bool isDppFI() const { return isImmTy(ImmTyDppFI); }

  bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }

  bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }

  bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }

  bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }

  bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }

  bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }

  bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }

  bool isOpSel() const { return isImmTy(ImmTyOpSel); }

  bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }

  bool isNegLo() const { return isImmTy(ImmTyNegLo); }

  bool isNegHi() const { return isImmTy(ImmTyNegHi); }

  bool isBitOp3() const { return isImmTy(ImmTyBitOp3) && isUInt<8>(getImm()); }


  bool isRegOrImm() const {

    return isReg() || isImm();

  }


  bool isRegClass(unsigned RCID) const;


  bool isInlineValue() const;


  bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {

    return isRegOrInline(RCID, type) && !hasModifiers();

  }


  bool isSCSrcB16() const {

    return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);

  }


  bool isSCSrcV2B16() const {

    return isSCSrcB16();

  }


  bool isSCSrc_b32() const {

    return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);

  }


  bool isSCSrc_b64() const {

    return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);

  }


  bool isBoolReg() const;


  bool isSCSrcF16() const {

    return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);

  }


  bool isSCSrcV2F16() const {

    return isSCSrcF16();

  }


  bool isSCSrcF32() const {

    return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);

  }


  bool isSCSrcF64() const {

    return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);

  }


  bool isSSrc_b32() const {

    return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();

  }


  bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }


  bool isSSrcV2B16() const {

    llvm_unreachable("cannot happen");

    return isSSrc_b16();

  }


  bool isSSrc_b64() const {

    // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.

    // See isVSrc64().

    return isSCSrc_b64() || isLiteralImm(MVT::i64) ||

           (((const MCTargetAsmParser *)AsmParser)

                ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&

            isExpr());

  }


  bool isSSrc_f32() const {

    return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();

  }


  bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }


  bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }


  bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }


  bool isSSrcV2F16() const {

    llvm_unreachable("cannot happen");

    return isSSrc_f16();

  }


  bool isSSrcV2FP32() const {

    llvm_unreachable("cannot happen");

    return isSSrc_f32();

  }


  bool isSCSrcV2FP32() const {

    llvm_unreachable("cannot happen");

    return isSCSrcF32();

  }


  bool isSSrcV2INT32() const {

    llvm_unreachable("cannot happen");

    return isSSrc_b32();

  }


  bool isSCSrcV2INT32() const {

    llvm_unreachable("cannot happen");

    return isSCSrc_b32();

  }


  bool isSSrcOrLds_b32() const {

    return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||

           isLiteralImm(MVT::i32) || isExpr();

  }


  bool isVCSrc_b32() const {

    return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);

  }


  bool isVCSrc_b32_Lo256() const {

    return isRegOrInlineNoMods(AMDGPU::VS_32_Lo256RegClassID, MVT::i32);

  }


  bool isVCSrc_b64_Lo256() const {

    return isRegOrInlineNoMods(AMDGPU::VS_64_Lo256RegClassID, MVT::i64);

  }


  bool isVCSrc_b64() const {

    return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);

  }


  bool isVCSrcT_b16() const {

    return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);

  }


  bool isVCSrcTB16_Lo128() const {

    return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);

  }


  bool isVCSrcFake16B16_Lo128() const {

    return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);

  }


  bool isVCSrc_b16() const {

    return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);

  }


  bool isVCSrc_v2b16() const { return isVCSrc_b16(); }


  bool isVCSrc_f32() const {

    return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);

  }


  bool isVCSrc_f64() const {

    return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);

  }


  bool isVCSrcTBF16() const {

    return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);

  }


  bool isVCSrcT_f16() const {

    return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);

  }


  bool isVCSrcT_bf16() const {

    return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);

  }


  bool isVCSrcTBF16_Lo128() const {

    return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);

  }


  bool isVCSrcTF16_Lo128() const {

    return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);

  }


  bool isVCSrcFake16BF16_Lo128() const {

    return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);

  }


  bool isVCSrcFake16F16_Lo128() const {

    return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);

  }


  bool isVCSrc_bf16() const {

    return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);

  }


  bool isVCSrc_f16() const {

    return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);

  }


  bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }


  bool isVCSrc_v2f16() const { return isVCSrc_f16(); }


  bool isVSrc_b32() const {

    return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();

  }


  bool isVSrc_b64() const { return isVCSrc_f64() || isLiteralImm(MVT::i64); }


  bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }


  bool isVSrcT_b16_Lo128() const {

    return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);

  }


  bool isVSrcFake16_b16_Lo128() const {

    return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);

  }


  bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }


  bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }


  bool isVCSrcV2FP32() const { return isVCSrc_f64(); }


  bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }


  bool isVCSrc_v2b32() const { return isVCSrc_b64(); }


  bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }


  bool isVSrc_f32() const {

    return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();

  }


  bool isVSrc_f64() const { return isVCSrc_f64() || isLiteralImm(MVT::f64); }


  bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }


  bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(MVT::f16); }


  bool isVSrcT_bf16_Lo128() const {

    return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);

  }


  bool isVSrcT_f16_Lo128() const {

    return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);

  }


  bool isVSrcFake16_bf16_Lo128() const {

    return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);

  }


  bool isVSrcFake16_f16_Lo128() const {

    return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);

  }


  bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }


  bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }


  bool isVSrc_v2bf16() const {

    return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);

  }


  bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }


  bool isVSrc_NoInline_v2f16() const { return isVSrc_v2f16(); }


  bool isVISrcB32() const {

    return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);

  }


  bool isVISrcB16() const {

    return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);

  }


  bool isVISrcV2B16() const {

    return isVISrcB16();

  }


  bool isVISrcF32() const {

    return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);

  }


  bool isVISrcF16() const {

    return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);

  }


  bool isVISrcV2F16() const {

    return isVISrcF16() || isVISrcB32();

  }


  bool isVISrc_64_bf16() const {

    return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);

  }


  bool isVISrc_64_f16() const {

    return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);

  }


  bool isVISrc_64_b32() const {

    return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);

  }


  bool isVISrc_64B64() const {

    return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);

  }


  bool isVISrc_64_f64() const {

    return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);

  }


  bool isVISrc_64V2FP32() const {

    return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);

  }


  bool isVISrc_64V2INT32() const {

    return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);

  }


  bool isVISrc_256_b32() const {

    return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);

  }


  bool isVISrc_256_f32() const {

    return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);

  }


  bool isVISrc_256B64() const {

    return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);

  }


  bool isVISrc_256_f64() const {

    return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);

  }


  bool isVISrc_512_f64() const {

    return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64);

  }


  bool isVISrc_128B16() const {

    return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);

  }


  bool isVISrc_128V2B16() const {

    return isVISrc_128B16();

  }


  bool isVISrc_128_b32() const {

    return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);

  }


  bool isVISrc_128_f32() const {

    return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);

  }


  bool isVISrc_256V2FP32() const {

    return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);

  }


  bool isVISrc_256V2INT32() const {

    return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);

  }


  bool isVISrc_512_b32() const {

    return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);

  }


  bool isVISrc_512B16() const {

    return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);

  }


  bool isVISrc_512V2B16() const {

    return isVISrc_512B16();

  }


  bool isVISrc_512_f32() const {

    return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);

  }


  bool isVISrc_512F16() const {

    return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);

  }


  bool isVISrc_512V2F16() const {

    return isVISrc_512F16() || isVISrc_512_b32();

  }


  bool isVISrc_1024_b32() const {

    return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);

  }


  bool isVISrc_1024B16() const {

    return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);

  }


  bool isVISrc_1024V2B16() const {

    return isVISrc_1024B16();

  }


  bool isVISrc_1024_f32() const {

    return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);

  }


  bool isVISrc_1024F16() const {

    return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);

  }


  bool isVISrc_1024V2F16() const {

    return isVISrc_1024F16() || isVISrc_1024_b32();

  }


  bool isAISrcB32() const {

    return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);

  }


  bool isAISrcB16() const {

    return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);

  }


  bool isAISrcV2B16() const {

    return isAISrcB16();

  }


  bool isAISrcF32() const {

    return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);

  }


  bool isAISrcF16() const {

    return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);

  }


  bool isAISrcV2F16() const {

    return isAISrcF16() || isAISrcB32();

  }


  bool isAISrc_64B64() const {

    return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);

  }


  bool isAISrc_64_f64() const {

    return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);

  }


  bool isAISrc_128_b32() const {

    return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);

  }


  bool isAISrc_128B16() const {

    return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);

  }


  bool isAISrc_128V2B16() const {

    return isAISrc_128B16();

  }


  bool isAISrc_128_f32() const {

    return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);

  }


  bool isAISrc_128F16() const {

    return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);

  }


  bool isAISrc_128V2F16() const {

    return isAISrc_128F16() || isAISrc_128_b32();

  }


  bool isVISrc_128_bf16() const {

    return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);

  }


  bool isVISrc_128_f16() const {

    return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);

  }


  bool isVISrc_128V2F16() const {

    return isVISrc_128_f16() || isVISrc_128_b32();

  }


  bool isAISrc_256B64() const {

    return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);

  }


  bool isAISrc_256_f64() const {

    return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);

  }


  bool isAISrc_512_b32() const {

    return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);

  }


  bool isAISrc_512B16() const {

    return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);

  }


  bool isAISrc_512V2B16() const {

    return isAISrc_512B16();

  }


  bool isAISrc_512_f32() const {

    return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);

  }


  bool isAISrc_512F16() const {

    return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);

  }


  bool isAISrc_512V2F16() const {

    return isAISrc_512F16() || isAISrc_512_b32();

  }


  bool isAISrc_1024_b32() const {

    return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);

  }


  bool isAISrc_1024B16() const {

    return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);

  }


  bool isAISrc_1024V2B16() const {

    return isAISrc_1024B16();

  }


  bool isAISrc_1024_f32() const {

    return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);

  }


  bool isAISrc_1024F16() const {

    return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);

  }


  bool isAISrc_1024V2F16() const {

    return isAISrc_1024F16() || isAISrc_1024_b32();

  }


  bool isKImmFP32() const {

    return isLiteralImm(MVT::f32);

  }


  bool isKImmFP16() const {

    return isLiteralImm(MVT::f16);

  }


  bool isKImmFP64() const { return isLiteralImm(MVT::f64); }


  bool isMem() const override {

    return false;

  }


  bool isExpr() const {

    return Kind == Expression;

  }


  bool isSOPPBrTarget() const { return isExpr() || isImm(); }


  bool isSWaitCnt() const;

  bool isDepCtr() const;

  bool isSDelayALU() const;

  bool isHwreg() const;

  bool isSendMsg() const;

  bool isSplitBarrier() const;

  bool isSwizzle() const;

  bool isSMRDOffset8() const;

  bool isSMEMOffset() const;

  bool isSMRDLiteralOffset() const;

  bool isDPP8() const;

  bool isDPPCtrl() const;

  bool isBLGP() const;

  bool isGPRIdxMode() const;

  bool isS16Imm() const;

  bool isU16Imm() const;

  bool isEndpgm() const;


  auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {

    return [this, P]() { return P(*this); };

  }


  StringRef getToken() const {

    assert(isToken());

    return StringRef(Tok.Data, Tok.Length);

  }


  int64_t getImm() const {

    assert(isImm());

    return Imm.Val;

  }


  void setImm(int64_t Val) {

    assert(isImm());

    Imm.Val = Val;

  }


  ImmTy getImmTy() const {

    assert(isImm());

    return Imm.Type;

  }


  MCRegister getReg() const override {

    assert(isRegKind());

    return Reg.RegNo;

  }


  SMLoc getStartLoc() const override {

    return StartLoc;

  }


  SMLoc getEndLoc() const override {

    return EndLoc;

  }


  SMRange getLocRange() const {

    return SMRange(StartLoc, EndLoc);

  }


  Modifiers getModifiers() const {

    assert(isRegKind() || isImmTy(ImmTyNone));

    return isRegKind() ? Reg.Mods : Imm.Mods;

  }


  void setModifiers(Modifiers Mods) {

    assert(isRegKind() || isImmTy(ImmTyNone));

    if (isRegKind())

      Reg.Mods = Mods;

    else

      Imm.Mods = Mods;

  }


  bool hasModifiers() const {

    return getModifiers().hasModifiers();

  }


  bool hasFPModifiers() const {

    return getModifiers().hasFPModifiers();

  }


  bool hasIntModifiers() const {

    return getModifiers().hasIntModifiers();

  }


  uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;


  void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;


  void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;


  void addRegOperands(MCInst &Inst, unsigned N) const;


  void addRegOrImmOperands(MCInst &Inst, unsigned N) const {

    if (isRegKind())

      addRegOperands(Inst, N);

    else

      addImmOperands(Inst, N);

  }


  void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {

    Modifiers Mods = getModifiers();

    Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));

    if (isRegKind()) {

      addRegOperands(Inst, N);

    } else {

      addImmOperands(Inst, N, false);

    }

  }


  void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {

    assert(!hasIntModifiers());

    addRegOrImmWithInputModsOperands(Inst, N);

  }


  void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {

    assert(!hasFPModifiers());

    addRegOrImmWithInputModsOperands(Inst, N);

  }


  void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {

    Modifiers Mods = getModifiers();

    Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));

    assert(isRegKind());

    addRegOperands(Inst, N);

  }


  void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {

    assert(!hasIntModifiers());

    addRegWithInputModsOperands(Inst, N);

  }


  void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {

    assert(!hasFPModifiers());

    addRegWithInputModsOperands(Inst, N);

  }


  static void printImmTy(raw_ostream& OS, ImmTy Type) {

    // clang-format off

    switch (Type) {

    case ImmTyNone: OS << "None"; break;

    case ImmTyGDS: OS << "GDS"; break;

    case ImmTyLDS: OS << "LDS"; break;

    case ImmTyOffen: OS << "Offen"; break;

    case ImmTyIdxen: OS << "Idxen"; break;

    case ImmTyAddr64: OS << "Addr64"; break;

    case ImmTyOffset: OS << "Offset"; break;

    case ImmTyInstOffset: OS << "InstOffset"; break;

    case ImmTyOffset0: OS << "Offset0"; break;

    case ImmTyOffset1: OS << "Offset1"; break;

    case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;

    case ImmTyCPol: OS << "CPol"; break;

    case ImmTyIndexKey8bit: OS << "index_key"; break;

    case ImmTyIndexKey16bit: OS << "index_key"; break;

    case ImmTyIndexKey32bit: OS << "index_key"; break;

    case ImmTyTFE: OS << "TFE"; break;

    case ImmTyD16: OS << "D16"; break;

    case ImmTyFORMAT: OS << "FORMAT"; break;

    case ImmTyClamp: OS << "Clamp"; break;

    case ImmTyOModSI: OS << "OModSI"; break;

    case ImmTyDPP8: OS << "DPP8"; break;

    case ImmTyDppCtrl: OS << "DppCtrl"; break;

    case ImmTyDppRowMask: OS << "DppRowMask"; break;

    case ImmTyDppBankMask: OS << "DppBankMask"; break;

    case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;

    case ImmTyDppFI: OS << "DppFI"; break;

    case ImmTySDWADstSel: OS << "SDWADstSel"; break;

    case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;

    case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;

    case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;

    case ImmTyDMask: OS << "DMask"; break;

    case ImmTyDim: OS << "Dim"; break;

    case ImmTyUNorm: OS << "UNorm"; break;

    case ImmTyDA: OS << "DA"; break;

    case ImmTyR128A16: OS << "R128A16"; break;

    case ImmTyA16: OS << "A16"; break;

    case ImmTyLWE: OS << "LWE"; break;

    case ImmTyOff: OS << "Off"; break;

    case ImmTyExpTgt: OS << "ExpTgt"; break;

    case ImmTyExpCompr: OS << "ExpCompr"; break;

    case ImmTyExpVM: OS << "ExpVM"; break;

    case ImmTyHwreg: OS << "Hwreg"; break;

    case ImmTySendMsg: OS << "SendMsg"; break;

    case ImmTyInterpSlot: OS << "InterpSlot"; break;

    case ImmTyInterpAttr: OS << "InterpAttr"; break;

    case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;

    case ImmTyOpSel: OS << "OpSel"; break;

    case ImmTyOpSelHi: OS << "OpSelHi"; break;

    case ImmTyNegLo: OS << "NegLo"; break;

    case ImmTyNegHi: OS << "NegHi"; break;

    case ImmTySwizzle: OS << "Swizzle"; break;

    case ImmTyGprIdxMode: OS << "GprIdxMode"; break;

    case ImmTyHigh: OS << "High"; break;

    case ImmTyBLGP: OS << "BLGP"; break;

    case ImmTyCBSZ: OS << "CBSZ"; break;

    case ImmTyABID: OS << "ABID"; break;

    case ImmTyEndpgm: OS << "Endpgm"; break;

    case ImmTyWaitVDST: OS << "WaitVDST"; break;

    case ImmTyWaitEXP: OS << "WaitEXP"; break;

    case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;

    case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;

    case ImmTyBitOp3: OS << "BitOp3"; break;

    case ImmTyMatrixAFMT: OS << "ImmTyMatrixAFMT"; break;

    case ImmTyMatrixBFMT: OS << "ImmTyMatrixBFMT"; break;

    case ImmTyMatrixAScale: OS << "ImmTyMatrixAScale"; break;

    case ImmTyMatrixBScale: OS << "ImmTyMatrixBScale"; break;

    case ImmTyMatrixAScaleFmt: OS << "ImmTyMatrixAScaleFmt"; break;

    case ImmTyMatrixBScaleFmt: OS << "ImmTyMatrixBScaleFmt"; break;

    case ImmTyMatrixAReuse: OS << "ImmTyMatrixAReuse"; break;

    case ImmTyMatrixBReuse: OS << "ImmTyMatrixBReuse"; break;

    case ImmTyScaleSel: OS << "ScaleSel" ; break;

    case ImmTyByteSel: OS << "ByteSel" ; break;

    }

    // clang-format on

  }


  void print(raw_ostream &OS, const MCAsmInfo &MAI) const override {

    switch (Kind) {

    case Register:

      OS << "<register " << AMDGPUInstPrinter::getRegisterName(getReg())

         << " mods: " << Reg.Mods << '>';

      break;

    case Immediate:

      OS << '<' << getImm();

      if (getImmTy() != ImmTyNone) {

        OS << " type: "; printImmTy(OS, getImmTy());

      }

      OS << " mods: " << Imm.Mods << '>';

      break;

    case Token:

      OS << '\'' << getToken() << '\'';

      break;

    case Expression:

      OS << "<expr ";

      MAI.printExpr(OS, *Expr);

      OS << '>';

      break;

    }

  }


  static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,

                                      int64_t Val, SMLoc Loc,

                                      ImmTy Type = ImmTyNone,

                                      bool IsFPImm = false) {

    auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);

    Op->Imm.Val = Val;

    Op->Imm.IsFPImm = IsFPImm;

    Op->Imm.Kind = ImmKindTyNone;

    Op->Imm.Type = Type;

    Op->Imm.Mods = Modifiers();

    Op->StartLoc = Loc;

    Op->EndLoc = Loc;

    return Op;

  }


  static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,

                                        StringRef Str, SMLoc Loc,

                                        bool HasExplicitEncodingSize = true) {

    auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);

    Res->Tok.Data = Str.data();

    Res->Tok.Length = Str.size();

    Res->StartLoc = Loc;

    Res->EndLoc = Loc;

    return Res;

  }


  static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,

                                      MCRegister Reg, SMLoc S, SMLoc E) {

    auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);

    Op->Reg.RegNo = Reg;

    Op->Reg.Mods = Modifiers();

    Op->StartLoc = S;

    Op->EndLoc = E;

    return Op;

  }


  static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,

                                       const class MCExpr *Expr, SMLoc S) {

    auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);

    Op->Expr = Expr;

    Op->StartLoc = S;

    Op->EndLoc = S;

    return Op;

  }

};


raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {

  OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;

  return OS;

}


//===----------------------------------------------------------------------===//

// AsmParser

//===----------------------------------------------------------------------===//


// Holds info related to the current kernel, e.g. count of SGPRs used.

// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next

// .amdgpu_hsa_kernel or at EOF.

class KernelScopeInfo {

  int SgprIndexUnusedMin = -1;

  int VgprIndexUnusedMin = -1;

  int AgprIndexUnusedMin = -1;

  MCContext *Ctx = nullptr;

  MCSubtargetInfo const *MSTI = nullptr;


  void usesSgprAt(int i) {

    if (i >= SgprIndexUnusedMin) {

      SgprIndexUnusedMin = ++i;

      if (Ctx) {

        MCSymbol* const Sym =

          Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));

        Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));

      }

    }

  }


  void usesVgprAt(int i) {

    if (i >= VgprIndexUnusedMin) {

      VgprIndexUnusedMin = ++i;

      if (Ctx) {

        MCSymbol* const Sym =

          Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));

        int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,

                                         VgprIndexUnusedMin);

        Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));

      }

    }

  }


  void usesAgprAt(int i) {

    // Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction

    if (!hasMAIInsts(*MSTI))

      return;


    if (i >= AgprIndexUnusedMin) {

      AgprIndexUnusedMin = ++i;

      if (Ctx) {

        MCSymbol* const Sym =

          Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));

        Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));


        // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)

        MCSymbol* const vSym =

          Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));

        int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,

                                         VgprIndexUnusedMin);

        vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));

      }

    }

  }


public:

  KernelScopeInfo() = default;


  void initialize(MCContext &Context) {

    Ctx = &Context;

    MSTI = Ctx->getSubtargetInfo();


    usesSgprAt(SgprIndexUnusedMin = -1);

    usesVgprAt(VgprIndexUnusedMin = -1);

    if (hasMAIInsts(*MSTI)) {

      usesAgprAt(AgprIndexUnusedMin = -1);

    }

  }


  void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,

                    unsigned RegWidth) {

    switch (RegKind) {

    case IS_SGPR:

      usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);

      break;

    case IS_AGPR:

      usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);

      break;

    case IS_VGPR:

      usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);

      break;

    default:

      break;

    }

  }

};


class AMDGPUAsmParser : public MCTargetAsmParser {

  MCAsmParser &Parser;


  unsigned ForcedEncodingSize = 0;

  bool ForcedDPP = false;

  bool ForcedSDWA = false;

  KernelScopeInfo KernelScope;


  /// @name Auto-generated Match Functions

  /// {


#define GET_ASSEMBLER_HEADER

#include "AMDGPUGenAsmMatcher.inc"


  /// }


private:

  void createConstantSymbol(StringRef Id, int64_t Val);


  bool ParseAsAbsoluteExpression(uint32_t &Ret);

  bool OutOfRangeError(SMRange Range);

  /// Calculate VGPR/SGPR blocks required for given target, reserved

  /// registers, and user-specified NextFreeXGPR values.

  ///

  /// \param Features [in] Target features, used for bug corrections.

  /// \param VCCUsed [in] Whether VCC special SGPR is reserved.

  /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.

  /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.

  /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel

  /// descriptor field, if valid.

  /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.

  /// \param VGPRRange [in] Token range, used for VGPR diagnostics.

  /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.

  /// \param SGPRRange [in] Token range, used for SGPR diagnostics.

  /// \param VGPRBlocks [out] Result VGPR block count.

  /// \param SGPRBlocks [out] Result SGPR block count.

  bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,

                          const MCExpr *FlatScrUsed, bool XNACKUsed,

                          std::optional<bool> EnableWavefrontSize32,

                          const MCExpr *NextFreeVGPR, SMRange VGPRRange,

                          const MCExpr *NextFreeSGPR, SMRange SGPRRange,

                          const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);

  bool ParseDirectiveAMDGCNTarget();

  bool ParseDirectiveAMDHSACodeObjectVersion();

  bool ParseDirectiveAMDHSAKernel();

  bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);

  bool ParseDirectiveAMDKernelCodeT();

  // TODO: Possibly make subtargetHasRegister const.

  bool subtargetHasRegister(const MCRegisterInfo &MRI, MCRegister Reg);

  bool ParseDirectiveAMDGPUHsaKernel();


  bool ParseDirectiveISAVersion();

  bool ParseDirectiveHSAMetadata();

  bool ParseDirectivePALMetadataBegin();

  bool ParseDirectivePALMetadata();

  bool ParseDirectiveAMDGPULDS();


  /// Common code to parse out a block of text (typically YAML) between start and

  /// end directives.

  bool ParseToEndDirective(const char *AssemblerDirectiveBegin,

                           const char *AssemblerDirectiveEnd,

                           std::string &CollectString);


  bool AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,

                             RegisterKind RegKind, MCRegister Reg1, SMLoc Loc);

  bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,

                           unsigned &RegNum, unsigned &RegWidth,

                           bool RestoreOnFailure = false);

  bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,

                           unsigned &RegNum, unsigned &RegWidth,

                           SmallVectorImpl<AsmToken> &Tokens);

  MCRegister ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,

                             unsigned &RegWidth,

                             SmallVectorImpl<AsmToken> &Tokens);

  MCRegister ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,

                             unsigned &RegWidth,

                             SmallVectorImpl<AsmToken> &Tokens);

  MCRegister ParseRegList(RegisterKind &RegKind, unsigned &RegNum,

                          unsigned &RegWidth,

                          SmallVectorImpl<AsmToken> &Tokens);

  bool ParseRegRange(unsigned &Num, unsigned &Width, unsigned &SubReg);

  MCRegister getRegularReg(RegisterKind RegKind, unsigned RegNum,

                           unsigned SubReg, unsigned RegWidth, SMLoc Loc);


  bool isRegister();

  bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;

  std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);

  void initializeGprCountSymbol(RegisterKind RegKind);

  bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,

                             unsigned RegWidth);

  void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,

                    bool IsAtomic);


public:

  enum OperandMode {

    OperandMode_Default,

    OperandMode_NSA,

  };


  using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;


  AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,

               const MCInstrInfo &MII,

               const MCTargetOptions &Options)

      : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {

    MCAsmParserExtension::Initialize(Parser);


    if (getFeatureBits().none()) {

      // Set default features.

      copySTI().ToggleFeature("southern-islands");

    }


    FeatureBitset FB = getFeatureBits();

    if (!FB[AMDGPU::FeatureWavefrontSize64] &&

        !FB[AMDGPU::FeatureWavefrontSize32]) {

      // If there is no default wave size it must be a generation before gfx10,

      // these have FeatureWavefrontSize64 in their definition already. For

      // gfx10+ set wave32 as a default.

      copySTI().ToggleFeature(AMDGPU::FeatureWavefrontSize32);

    }


    setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));


    AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());

    if (ISA.Major >= 6 && isHsaAbi(getSTI())) {

      createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);

      createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);

      createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping);

    } else {

      createConstantSymbol(".option.machine_version_major", ISA.Major);

      createConstantSymbol(".option.machine_version_minor", ISA.Minor);

      createConstantSymbol(".option.machine_version_stepping", ISA.Stepping);

    }

    if (ISA.Major >= 6 && isHsaAbi(getSTI())) {

      initializeGprCountSymbol(IS_VGPR);

      initializeGprCountSymbol(IS_SGPR);

    } else

      KernelScope.initialize(getContext());


    for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())

      createConstantSymbol(Symbol, Code);


    createConstantSymbol("UC_VERSION_W64_BIT", 0x2000);

    createConstantSymbol("UC_VERSION_W32_BIT", 0x4000);

    createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000);

  }


  bool hasMIMG_R128() const {

    return AMDGPU::hasMIMG_R128(getSTI());

  }


  bool hasPackedD16() const {

    return AMDGPU::hasPackedD16(getSTI());

  }


  bool hasA16() const { return AMDGPU::hasA16(getSTI()); }


  bool hasG16() const { return AMDGPU::hasG16(getSTI()); }


  bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }


  bool isSI() const {

    return AMDGPU::isSI(getSTI());

  }


  bool isCI() const {

    return AMDGPU::isCI(getSTI());

  }


  bool isVI() const {

    return AMDGPU::isVI(getSTI());

  }


  bool isGFX9() const {

    return AMDGPU::isGFX9(getSTI());

  }


  // TODO: isGFX90A is also true for GFX940. We need to clean it.

  bool isGFX90A() const {

    return AMDGPU::isGFX90A(getSTI());

  }


  bool isGFX940() const {

    return AMDGPU::isGFX940(getSTI());

  }


  bool isGFX9Plus() const {

    return AMDGPU::isGFX9Plus(getSTI());

  }


  bool isGFX10() const {

    return AMDGPU::isGFX10(getSTI());

  }


  bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }


  bool isGFX11() const {

    return AMDGPU::isGFX11(getSTI());

  }


  bool isGFX11Plus() const {

    return AMDGPU::isGFX11Plus(getSTI());

  }


  bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }


  bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }


  bool isGFX1250() const { return AMDGPU::isGFX1250(getSTI()); }


  bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }


  bool isGFX10_BEncoding() const {

    return AMDGPU::isGFX10_BEncoding(getSTI());

  }


  bool hasInv2PiInlineImm() const {

    return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];

  }


  bool has64BitLiterals() const {

    return getFeatureBits()[AMDGPU::Feature64BitLiterals];

  }


  bool hasFlatOffsets() const {

    return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];

  }


  bool hasTrue16Insts() const {

    return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];

  }


  bool hasArchitectedFlatScratch() const {

    return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];

  }


  bool hasSGPR102_SGPR103() const {

    return !isVI() && !isGFX9();

  }


  bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }


  bool hasIntClamp() const {

    return getFeatureBits()[AMDGPU::FeatureIntClamp];

  }


  bool hasPartialNSAEncoding() const {

    return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];

  }


  bool hasGloballyAddressableScratch() const {

    return getFeatureBits()[AMDGPU::FeatureGloballyAddressableScratch];

  }


  unsigned getNSAMaxSize(bool HasSampler = false) const {

    return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);

  }


  unsigned getMaxNumUserSGPRs() const {

    return AMDGPU::getMaxNumUserSGPRs(getSTI());

  }


  bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }


  AMDGPUTargetStreamer &getTargetStreamer() {

    MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();

    return static_cast<AMDGPUTargetStreamer &>(TS);

  }


  const MCRegisterInfo *getMRI() const {

    // We need this const_cast because for some reason getContext() is not const

    // in MCAsmParser.

    return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();

  }


  const MCInstrInfo *getMII() const {

    return &MII;

  }


  const FeatureBitset &getFeatureBits() const {

    return getSTI().getFeatureBits();

  }


  void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }

  void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }

  void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }


  unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }

  bool isForcedVOP3() const { return ForcedEncodingSize == 64; }

  bool isForcedDPP() const { return ForcedDPP; }

  bool isForcedSDWA() const { return ForcedSDWA; }

  ArrayRef<unsigned> getMatchedVariants() const;

  StringRef getMatchedVariantName() const;


  std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);

  bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,

                     bool RestoreOnFailure);

  bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;

  ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,

                               SMLoc &EndLoc) override;

  unsigned checkTargetMatchPredicate(MCInst &Inst) override;

  unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,

                                      unsigned Kind) override;

  bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,

                               OperandVector &Operands, MCStreamer &Out,

                               uint64_t &ErrorInfo,

                               bool MatchingInlineAsm) override;

  bool ParseDirective(AsmToken DirectiveID) override;

  ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,

                           OperandMode Mode = OperandMode_Default);

  StringRef parseMnemonicSuffix(StringRef Name);

  bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,

                        SMLoc NameLoc, OperandVector &Operands) override;

  //bool ProcessInstruction(MCInst &Inst);


  ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);


  ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);


  ParseStatus

  parseIntWithPrefix(const char *Prefix, OperandVector &Operands,

                     AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,

                     std::function<bool(int64_t &)> ConvertResult = nullptr);


  ParseStatus parseOperandArrayWithPrefix(

      const char *Prefix, OperandVector &Operands,

      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,

      bool (*ConvertResult)(int64_t &) = nullptr);


  ParseStatus

  parseNamedBit(StringRef Name, OperandVector &Operands,

                AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);

  unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;

  ParseStatus parseCPol(OperandVector &Operands);

  ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);

  ParseStatus parseTH(OperandVector &Operands, int64_t &TH);

  ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,

                                    SMLoc &StringLoc);

  ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,

                                         StringRef Name,

                                         ArrayRef<const char *> Ids,

                                         int64_t &IntVal);

  ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,

                                         StringRef Name,

                                         ArrayRef<const char *> Ids,

                                         AMDGPUOperand::ImmTy Type);


  bool isModifier();

  bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;

  bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;

  bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;

  bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;

  bool parseSP3NegModifier();

  ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,

                       bool HasLit = false, bool HasLit64 = false);

  ParseStatus parseReg(OperandVector &Operands);

  ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,

                            bool HasLit = false, bool HasLit64 = false);

  ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,

                                           bool AllowImm = true);

  ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,

                                            bool AllowImm = true);

  ParseStatus parseRegWithFPInputMods(OperandVector &Operands);

  ParseStatus parseRegWithIntInputMods(OperandVector &Operands);

  ParseStatus parseVReg32OrOff(OperandVector &Operands);

  ParseStatus tryParseIndexKey(OperandVector &Operands,

                               AMDGPUOperand::ImmTy ImmTy);

  ParseStatus parseIndexKey8bit(OperandVector &Operands);

  ParseStatus parseIndexKey16bit(OperandVector &Operands);

  ParseStatus parseIndexKey32bit(OperandVector &Operands);

  ParseStatus tryParseMatrixFMT(OperandVector &Operands, StringRef Name,

                                AMDGPUOperand::ImmTy Type);

  ParseStatus parseMatrixAFMT(OperandVector &Operands);

  ParseStatus parseMatrixBFMT(OperandVector &Operands);

  ParseStatus tryParseMatrixScale(OperandVector &Operands, StringRef Name,

                                  AMDGPUOperand::ImmTy Type);

  ParseStatus parseMatrixAScale(OperandVector &Operands);

  ParseStatus parseMatrixBScale(OperandVector &Operands);

  ParseStatus tryParseMatrixScaleFmt(OperandVector &Operands, StringRef Name,

                                     AMDGPUOperand::ImmTy Type);

  ParseStatus parseMatrixAScaleFmt(OperandVector &Operands);

  ParseStatus parseMatrixBScaleFmt(OperandVector &Operands);


  ParseStatus parseDfmtNfmt(int64_t &Format);

  ParseStatus parseUfmt(int64_t &Format);

  ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,

                                       int64_t &Format);

  ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,

                                         int64_t &Format);

  ParseStatus parseFORMAT(OperandVector &Operands);

  ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);

  ParseStatus parseNumericFormat(int64_t &Format);

  ParseStatus parseFlatOffset(OperandVector &Operands);

  ParseStatus parseR128A16(OperandVector &Operands);

  ParseStatus parseBLGP(OperandVector &Operands);

  bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);

  bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);


  void cvtExp(MCInst &Inst, const OperandVector &Operands);


  bool parseCnt(int64_t &IntVal);

  ParseStatus parseSWaitCnt(OperandVector &Operands);


  bool parseDepCtr(int64_t &IntVal, unsigned &Mask);

  void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);

  ParseStatus parseDepCtr(OperandVector &Operands);


  bool parseDelay(int64_t &Delay);

  ParseStatus parseSDelayALU(OperandVector &Operands);


  ParseStatus parseHwreg(OperandVector &Operands);


private:

  struct OperandInfoTy {

    SMLoc Loc;

    int64_t Val;

    bool IsSymbolic = false;

    bool IsDefined = false;


    OperandInfoTy(int64_t Val) : Val(Val) {}

  };


  struct StructuredOpField : OperandInfoTy {

    StringLiteral Id;

    StringLiteral Desc;

    unsigned Width;

    bool IsDefined = false;


    StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width,

                      int64_t Default)

        : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}

    virtual ~StructuredOpField() = default;


    bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {

      Parser.Error(Loc, "invalid " + Desc + ": " + Err);

      return false;

    }


    virtual bool validate(AMDGPUAsmParser &Parser) const {

      if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)

        return Error(Parser, "not supported on this GPU");

      if (!isUIntN(Width, Val))

        return Error(Parser, "only " + Twine(Width) + "-bit values are legal");

      return true;

    }

  };


  ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);

  bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);


  bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);

  bool validateSendMsg(const OperandInfoTy &Msg,

                       const OperandInfoTy &Op,

                       const OperandInfoTy &Stream);


  ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,

                             OperandInfoTy &Width);


  SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;

  SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;

  SMLoc getBLGPLoc(const OperandVector &Operands) const;


  SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,

                      const OperandVector &Operands) const;

  SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;

  SMLoc getRegLoc(MCRegister Reg, const OperandVector &Operands) const;

  SMLoc getLitLoc(const OperandVector &Operands,

                  bool SearchMandatoryLiterals = false) const;

  SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;

  SMLoc getConstLoc(const OperandVector &Operands) const;

  SMLoc getInstLoc(const OperandVector &Operands) const;


  bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);

  bool validateOffset(const MCInst &Inst, const OperandVector &Operands);

  bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);

  bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);

  bool validateSOPLiteral(const MCInst &Inst) const;

  bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);

  std::optional<unsigned> checkVOPDRegBankConstraints(const MCInst &Inst,

                                                      bool AsVOPD3);

  bool validateVOPD(const MCInst &Inst, const OperandVector &Operands);

  bool tryVOPD(const MCInst &Inst);

  bool tryVOPD3(const MCInst &Inst);

  bool tryAnotherVOPDEncoding(const MCInst &Inst);


  bool validateIntClampSupported(const MCInst &Inst);

  bool validateMIMGAtomicDMask(const MCInst &Inst);

  bool validateMIMGGatherDMask(const MCInst &Inst);

  bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);

  bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);

  bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);

  bool validateMIMGD16(const MCInst &Inst);

  bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);

  bool validateTensorR128(const MCInst &Inst);

  bool validateMIMGMSAA(const MCInst &Inst);

  bool validateOpSel(const MCInst &Inst);

  bool validateTrue16OpSel(const MCInst &Inst);

  bool validateNeg(const MCInst &Inst, AMDGPU::OpName OpName);

  bool validateDPP(const MCInst &Inst, const OperandVector &Operands);

  bool validateVccOperand(MCRegister Reg) const;

  bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);

  bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);

  bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);

  bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);

  bool validateAGPRLdSt(const MCInst &Inst) const;

  bool validateVGPRAlign(const MCInst &Inst) const;

  bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);

  bool validateDS(const MCInst &Inst, const OperandVector &Operands);

  bool validateGWS(const MCInst &Inst, const OperandVector &Operands);

  bool validateDivScale(const MCInst &Inst);

  bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);

  bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,

                             const SMLoc &IDLoc);

  bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,

                              const unsigned CPol);

  bool validateTFE(const MCInst &Inst, const OperandVector &Operands);

  bool validateSetVgprMSB(const MCInst &Inst, const OperandVector &Operands);

  std::optional<StringRef> validateLdsDirect(const MCInst &Inst);

  bool validateWMMA(const MCInst &Inst, const OperandVector &Operands);

  unsigned getConstantBusLimit(unsigned Opcode) const;

  bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);

  bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;

  unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;


  bool isSupportedMnemo(StringRef Mnemo,

                        const FeatureBitset &FBS);

  bool isSupportedMnemo(StringRef Mnemo,

                        const FeatureBitset &FBS,

                        ArrayRef<unsigned> Variants);

  bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);


  bool isId(const StringRef Id) const;

  bool isId(const AsmToken &Token, const StringRef Id) const;

  bool isToken(const AsmToken::TokenKind Kind) const;

  StringRef getId() const;

  bool trySkipId(const StringRef Id);

  bool trySkipId(const StringRef Pref, const StringRef Id);

  bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);

  bool trySkipToken(const AsmToken::TokenKind Kind);

  bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);

  bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");

  bool parseId(StringRef &Val, const StringRef ErrMsg = "");


  void peekTokens(MutableArrayRef<AsmToken> Tokens);

  AsmToken::TokenKind getTokenKind() const;

  bool parseExpr(int64_t &Imm, StringRef Expected = "");

  bool parseExpr(OperandVector &Operands);

  StringRef getTokenStr() const;

  AsmToken peekToken(bool ShouldSkipSpace = true);

  AsmToken getToken() const;

  SMLoc getLoc() const;

  void lex();


public:

  void onBeginOfFile() override;

  bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;


  ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);


  ParseStatus parseExpTgt(OperandVector &Operands);

  ParseStatus parseSendMsg(OperandVector &Operands);

  ParseStatus parseInterpSlot(OperandVector &Operands);

  ParseStatus parseInterpAttr(OperandVector &Operands);

  ParseStatus parseSOPPBrTarget(OperandVector &Operands);

  ParseStatus parseBoolReg(OperandVector &Operands);


  bool parseSwizzleOperand(int64_t &Op, const unsigned MinVal,

                           const unsigned MaxVal, const Twine &ErrMsg,

                           SMLoc &Loc);

  bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,

                            const unsigned MinVal,

                            const unsigned MaxVal,

                            const StringRef ErrMsg);

  ParseStatus parseSwizzle(OperandVector &Operands);

  bool parseSwizzleOffset(int64_t &Imm);

  bool parseSwizzleMacro(int64_t &Imm);

  bool parseSwizzleQuadPerm(int64_t &Imm);

  bool parseSwizzleBitmaskPerm(int64_t &Imm);

  bool parseSwizzleBroadcast(int64_t &Imm);

  bool parseSwizzleSwap(int64_t &Imm);

  bool parseSwizzleReverse(int64_t &Imm);

  bool parseSwizzleFFT(int64_t &Imm);

  bool parseSwizzleRotate(int64_t &Imm);


  ParseStatus parseGPRIdxMode(OperandVector &Operands);

  int64_t parseGPRIdxMacro();


  void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }

  void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }


  ParseStatus parseOModSI(OperandVector &Operands);


  void cvtVOP3(MCInst &Inst, const OperandVector &Operands,

               OptionalImmIndexMap &OptionalIdx);

  void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands);

  void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);

  void cvtVOP3(MCInst &Inst, const OperandVector &Operands);

  void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);

  void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);


  void cvtVOPD(MCInst &Inst, const OperandVector &Operands);

  void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,

                    OptionalImmIndexMap &OptionalIdx);

  void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,

                OptionalImmIndexMap &OptionalIdx);


  void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);

  void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);


  bool parseDimId(unsigned &Encoding);

  ParseStatus parseDim(OperandVector &Operands);

  bool convertDppBoundCtrl(int64_t &BoundCtrl);

  ParseStatus parseDPP8(OperandVector &Operands);

  ParseStatus parseDPPCtrl(OperandVector &Operands);

  bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);

  int64_t parseDPPCtrlSel(StringRef Ctrl);

  int64_t parseDPPCtrlPerm();

  void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);

  void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {

    cvtDPP(Inst, Operands, true);

  }

  void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,

                  bool IsDPP8 = false);

  void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {

    cvtVOP3DPP(Inst, Operands, true);

  }


  ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,

                           AMDGPUOperand::ImmTy Type);

  ParseStatus parseSDWADstUnused(OperandVector &Operands);

  void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);

  void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);

  void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);

  void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);

  void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);

  void cvtSDWA(MCInst &Inst, const OperandVector &Operands,

               uint64_t BasicInstType,

               bool SkipDstVcc = false,

               bool SkipSrcVcc = false);


  ParseStatus parseEndpgm(OperandVector &Operands);


  ParseStatus parseVOPD(OperandVector &Operands);

};


} // end anonymous namespace


// May be called with integer type with equivalent bitwidth.


static const fltSemantics *getFltSemantics(unsigned Size) {

  switch (Size) {

  case 4:

    return &APFloat::IEEEsingle();

  case 8:

    return &APFloat::IEEEdouble();

  case 2:

    return &APFloat::IEEEhalf();

  default:

    llvm_unreachable("unsupported fp type");

  }

}


static const fltSemantics *getFltSemantics(MVT VT) {

  return getFltSemantics(VT.getSizeInBits() / 8);

}


static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {

  switch (OperandType) {

  // When floating-point immediate is used as operand of type i16, the 32-bit

   // representation of the constant truncated to the 16 LSBs should be used.

  case AMDGPU::OPERAND_REG_IMM_INT16:

  case AMDGPU::OPERAND_REG_INLINE_C_INT16:

  case AMDGPU::OPERAND_REG_IMM_INT32:

  case AMDGPU::OPERAND_REG_IMM_FP32:

  case AMDGPU::OPERAND_REG_INLINE_C_INT32:

  case AMDGPU::OPERAND_REG_INLINE_C_FP32:

  case AMDGPU::OPERAND_REG_INLINE_AC_INT32:

  case AMDGPU::OPERAND_REG_INLINE_AC_FP32:

  case AMDGPU::OPERAND_REG_IMM_V2FP32:

  case AMDGPU::OPERAND_REG_IMM_V2INT32:

  case AMDGPU::OPERAND_REG_IMM_V2INT16:

  case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:

  case AMDGPU::OPERAND_KIMM32:

  case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:

    return &APFloat::IEEEsingle();

  case AMDGPU::OPERAND_REG_IMM_INT64:

  case AMDGPU::OPERAND_REG_IMM_FP64:

  case AMDGPU::OPERAND_REG_INLINE_C_INT64:

  case AMDGPU::OPERAND_REG_INLINE_C_FP64:

  case AMDGPU::OPERAND_REG_INLINE_AC_FP64:

  case AMDGPU::OPERAND_KIMM64:

    return &APFloat::IEEEdouble();

  case AMDGPU::OPERAND_REG_IMM_FP16:

  case AMDGPU::OPERAND_REG_INLINE_C_FP16:

  case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:

  case AMDGPU::OPERAND_REG_IMM_V2FP16:

  case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:

  case AMDGPU::OPERAND_KIMM16:

    return &APFloat::IEEEhalf();

  case AMDGPU::OPERAND_REG_IMM_BF16:

  case AMDGPU::OPERAND_REG_INLINE_C_BF16:

  case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:

  case AMDGPU::OPERAND_REG_IMM_V2BF16:

    return &APFloat::BFloat();

  default:

    llvm_unreachable("unsupported fp type");

  }

}


//===----------------------------------------------------------------------===//

// Operand

//===----------------------------------------------------------------------===//


static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {

  bool Lost;


  // Convert literal to single precision

  APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),

                                               APFloat::rmNearestTiesToEven,

                                               &Lost);

  // We allow precision lost but not overflow or underflow

  if (Status != APFloat::opOK &&

      Lost &&

      ((Status & APFloat::opOverflow)  != 0 ||

       (Status & APFloat::opUnderflow) != 0)) {

    return false;

  }


  return true;

}


static bool isSafeTruncation(int64_t Val, unsigned Size) {

  return isUIntN(Size, Val) || isIntN(Size, Val);

}


static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {

  if (VT.getScalarType() == MVT::i16)

    return isInlinableLiteral32(Val, HasInv2Pi);


  if (VT.getScalarType() == MVT::f16)

    return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);


  assert(VT.getScalarType() == MVT::bf16);


  return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);

}


bool AMDGPUOperand::isInlinableImm(MVT type) const {


  // This is a hack to enable named inline values like

  // shared_base with both 32-bit and 64-bit operands.

  // Note that these values are defined as

  // 32-bit operands only.

  if (isInlineValue()) {

    return true;

  }


  if (!isImmTy(ImmTyNone)) {

    // Only plain immediates are inlinable (e.g. "clamp" attribute is not)

    return false;

  }

  // TODO: We should avoid using host float here. It would be better to

  // check the float bit values which is what a few other places do.

  // We've had bot failures before due to weird NaN support on mips hosts.


  APInt Literal(64, Imm.Val);


  if (Imm.IsFPImm) { // We got fp literal token

    if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand

      return AMDGPU::isInlinableLiteral64(Imm.Val,

                                          AsmParser->hasInv2PiInlineImm());

    }


    APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));

    if (!canLosslesslyConvertToFPType(FPLiteral, type))

      return false;


    if (type.getScalarSizeInBits() == 16) {

      bool Lost = false;

      switch (type.getScalarType().SimpleTy) {

      default:

        llvm_unreachable("unknown 16-bit type");

      case MVT::bf16:

        FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,

                          &Lost);

        break;

      case MVT::f16:

        FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,

                          &Lost);

        break;

      case MVT::i16:

        FPLiteral.convert(APFloatBase::IEEEsingle(),

                          APFloat::rmNearestTiesToEven, &Lost);

        break;

      }

      // We need to use 32-bit representation here because when a floating-point

      // inline constant is used as an i16 operand, its 32-bit representation

      // representation will be used. We will need the 32-bit value to check if

      // it is FP inline constant.

      uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();

      return isInlineableLiteralOp16(ImmVal, type,

                                     AsmParser->hasInv2PiInlineImm());

    }


    // Check if single precision literal is inlinable

    return AMDGPU::isInlinableLiteral32(

      static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),

      AsmParser->hasInv2PiInlineImm());

  }


  // We got int literal token.

  if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand

    return AMDGPU::isInlinableLiteral64(Imm.Val,

                                        AsmParser->hasInv2PiInlineImm());

  }


  if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {

    return false;

  }


  if (type.getScalarSizeInBits() == 16) {

    return isInlineableLiteralOp16(

      static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),

      type, AsmParser->hasInv2PiInlineImm());

  }


  return AMDGPU::isInlinableLiteral32(

    static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),

    AsmParser->hasInv2PiInlineImm());

}


bool AMDGPUOperand::isLiteralImm(MVT type) const {

  // Check that this immediate can be added as literal

  if (!isImmTy(ImmTyNone)) {

    return false;

  }


  bool Allow64Bit =

      (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();


  if (!Imm.IsFPImm) {

    // We got int literal token.


    if (type == MVT::f64 && hasFPModifiers()) {

      // Cannot apply fp modifiers to int literals preserving the same semantics

      // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,

      // disable these cases.

      return false;

    }


    unsigned Size = type.getSizeInBits();

    if (Size == 64) {

      if (Allow64Bit && !AMDGPU::isValid32BitLiteral(Imm.Val, false))

        return true;

      Size = 32;

    }


    // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP

    // types.

    return isSafeTruncation(Imm.Val, Size);

  }


  // We got fp literal token

  if (type == MVT::f64) { // Expected 64-bit fp operand

    // We would set low 64-bits of literal to zeroes but we accept this literals

    return true;

  }


  if (type == MVT::i64) { // Expected 64-bit int operand

    // We don't allow fp literals in 64-bit integer instructions. It is

    // unclear how we should encode them.

    return false;

  }


  // We allow fp literals with f16x2 operands assuming that the specified

  // literal goes into the lower half and the upper half is zero. We also

  // require that the literal may be losslessly converted to f16.

  //

  // For i16x2 operands, we assume that the specified literal is encoded as a

  // single-precision float. This is pretty odd, but it matches SP3 and what

  // happens in hardware.

  MVT ExpectedType = (type == MVT::v2f16)   ? MVT::f16

                     : (type == MVT::v2i16) ? MVT::f32

                     : (type == MVT::v2f32) ? MVT::f32

                                            : type;


  APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));

  return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);

}


bool AMDGPUOperand::isRegClass(unsigned RCID) const {

  return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());

}


bool AMDGPUOperand::isVRegWithInputMods() const {

  return isRegClass(AMDGPU::VGPR_32RegClassID) ||

         // GFX90A allows DPP on 64-bit operands.

         (isRegClass(AMDGPU::VReg_64RegClassID) &&

          AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);

}


template <bool IsFake16>

bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {

  return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID

                             : AMDGPU::VGPR_16_Lo128RegClassID);

}


template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {

  return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID

                             : AMDGPU::VGPR_16RegClassID);

}


bool AMDGPUOperand::isSDWAOperand(MVT type) const {

  if (AsmParser->isVI())

    return isVReg32();

  if (AsmParser->isGFX9Plus())

    return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);

  return false;

}


bool AMDGPUOperand::isSDWAFP16Operand() const {

  return isSDWAOperand(MVT::f16);

}


bool AMDGPUOperand::isSDWAFP32Operand() const {

  return isSDWAOperand(MVT::f32);

}


bool AMDGPUOperand::isSDWAInt16Operand() const {

  return isSDWAOperand(MVT::i16);

}


bool AMDGPUOperand::isSDWAInt32Operand() const {

  return isSDWAOperand(MVT::i32);

}


bool AMDGPUOperand::isBoolReg() const {

  auto FB = AsmParser->getFeatureBits();

  return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||

                     (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));

}


uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const

{

  assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());

  assert(Size == 2 || Size == 4 || Size == 8);


  const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));


  if (Imm.Mods.Abs) {

    Val &= ~FpSignMask;

  }

  if (Imm.Mods.Neg) {

    Val ^= FpSignMask;

  }


  return Val;

}


void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {

  if (isExpr()) {

    Inst.addOperand(MCOperand::createExpr(Expr));

    return;

  }


  if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),

                             Inst.getNumOperands())) {

    addLiteralImmOperand(Inst, Imm.Val,

                         ApplyModifiers &

                         isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());

  } else {

    assert(!isImmTy(ImmTyNone) || !hasModifiers());

    Inst.addOperand(MCOperand::createImm(Imm.Val));

    setImmKindNone();

  }

}


void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {

  const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());

  auto OpNum = Inst.getNumOperands();

  // Check that this operand accepts literals

  assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));


  if (ApplyModifiers) {

    assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));

    const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);

    Val = applyInputFPModifiers(Val, Size);

  }


  APInt Literal(64, Val);

  uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;


  if (Imm.IsFPImm) { // We got fp literal token

    switch (OpTy) {

    case AMDGPU::OPERAND_REG_IMM_INT64:

    case AMDGPU::OPERAND_REG_IMM_FP64:

    case AMDGPU::OPERAND_REG_INLINE_C_INT64:

    case AMDGPU::OPERAND_REG_INLINE_C_FP64:

    case AMDGPU::OPERAND_REG_INLINE_AC_FP64:

      if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),

                                       AsmParser->hasInv2PiInlineImm())) {

        Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));

        setImmKindConst();

        return;

      }


      // Non-inlineable

      if (AMDGPU::isSISrcFPOperand(InstDesc,

                                   OpNum)) { // Expected 64-bit fp operand

        bool HasMandatoryLiteral =

            AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::imm);

        // For fp operands we check if low 32 bits are zeros

        if (Literal.getLoBits(32) != 0 &&

            (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&

            !HasMandatoryLiteral) {

          const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(

              Inst.getLoc(),

              "Can't encode literal as exact 64-bit floating-point operand. "

              "Low 32-bits will be set to zero");

          Val &= 0xffffffff00000000u;

        }


        Inst.addOperand(MCOperand::createImm(Val));

        setImmKindLiteral();

        return;

      }


      // We don't allow fp literals in 64-bit integer instructions. It is

      // unclear how we should encode them. This case should be checked earlier

      // in predicate methods (isLiteralImm())

      llvm_unreachable("fp literal in 64-bit integer instruction.");


    case AMDGPU::OPERAND_KIMM64:

      Inst.addOperand(MCOperand::createImm(Val));

      setImmKindMandatoryLiteral();

      return;


    case AMDGPU::OPERAND_REG_IMM_BF16:

    case AMDGPU::OPERAND_REG_INLINE_C_BF16:

    case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:

    case AMDGPU::OPERAND_REG_IMM_V2BF16:

      if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) {

        // This is the 1/(2*pi) which is going to be truncated to bf16 with the

        // loss of precision. The constant represents ideomatic fp32 value of

        // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16

        // bits. Prevent rounding below.

        Inst.addOperand(MCOperand::createImm(0x3e22));

        setImmKindLiteral();

        return;

      }

      [[fallthrough]];


    case AMDGPU::OPERAND_REG_IMM_INT32:

    case AMDGPU::OPERAND_REG_IMM_FP32:

    case AMDGPU::OPERAND_REG_INLINE_C_INT32:

    case AMDGPU::OPERAND_REG_INLINE_C_FP32:

    case AMDGPU::OPERAND_REG_INLINE_AC_INT32:

    case AMDGPU::OPERAND_REG_INLINE_AC_FP32:

    case AMDGPU::OPERAND_REG_IMM_INT16:

    case AMDGPU::OPERAND_REG_IMM_FP16:

    case AMDGPU::OPERAND_REG_INLINE_C_INT16:

    case AMDGPU::OPERAND_REG_INLINE_C_FP16:

    case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:

    case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:

    case AMDGPU::OPERAND_REG_IMM_V2INT16:

    case AMDGPU::OPERAND_REG_IMM_V2FP16:

    case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:

    case AMDGPU::OPERAND_REG_IMM_V2FP32:

    case AMDGPU::OPERAND_REG_IMM_V2INT32:

    case AMDGPU::OPERAND_KIMM32:

    case AMDGPU::OPERAND_KIMM16:

    case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: {

      bool lost;

      APFloat FPLiteral(APFloat::IEEEdouble(), Literal);

      // Convert literal to single precision

      FPLiteral.convert(*getOpFltSemantics(OpTy),

                        APFloat::rmNearestTiesToEven, &lost);

      // We allow precision lost but not overflow or underflow. This should be

      // checked earlier in isLiteralImm()


      uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();

      Inst.addOperand(MCOperand::createImm(ImmVal));

      if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {

        setImmKindMandatoryLiteral();

      } else {

        setImmKindLiteral();

      }

      return;

    }

    default:

      llvm_unreachable("invalid operand size");

    }


    return;

  }


  // We got int literal token.

  // Only sign extend inline immediates.

  switch (OpTy) {

  case AMDGPU::OPERAND_REG_IMM_INT32:

  case AMDGPU::OPERAND_REG_IMM_FP32:

  case AMDGPU::OPERAND_REG_INLINE_C_INT32:

  case AMDGPU::OPERAND_REG_INLINE_C_FP32:

  case AMDGPU::OPERAND_REG_INLINE_AC_INT32:

  case AMDGPU::OPERAND_REG_INLINE_AC_FP32:

  case AMDGPU::OPERAND_REG_IMM_V2INT16:

  case AMDGPU::OPERAND_REG_IMM_V2BF16:

  case AMDGPU::OPERAND_REG_IMM_V2FP16:

  case AMDGPU::OPERAND_REG_IMM_V2FP32:

  case AMDGPU::OPERAND_REG_IMM_V2INT32:

  case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:

    if (isSafeTruncation(Val, 32) &&

        AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),

                                     AsmParser->hasInv2PiInlineImm())) {

      Inst.addOperand(MCOperand::createImm(Val));

      setImmKindConst();

      return;

    }

    [[fallthrough]];


  case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:


    Inst.addOperand(MCOperand::createImm(Lo_32(Val)));

    setImmKindLiteral();

    return;


  case AMDGPU::OPERAND_REG_IMM_INT64:

  case AMDGPU::OPERAND_REG_INLINE_C_INT64:

    if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {

      Inst.addOperand(MCOperand::createImm(Val));

      setImmKindConst();

      return;

    }


    // When the 32 MSBs are not zero (effectively means it can't be safely

    // truncated to uint32_t), if the target doesn't support 64-bit literals, or

    // the lit modifier is explicitly used, we need to truncate it to the 32

    // LSBs.

    if (!AsmParser->has64BitLiterals() || getModifiers().Lit)

      Val = Lo_32(Val);


    Inst.addOperand(MCOperand::createImm(Val));

    setImmKindLiteral();

    return;


  case AMDGPU::OPERAND_REG_IMM_FP64:

  case AMDGPU::OPERAND_REG_INLINE_C_FP64:

  case AMDGPU::OPERAND_REG_INLINE_AC_FP64:

    if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {

      Inst.addOperand(MCOperand::createImm(Val));

      setImmKindConst();

      return;

    }


    // If the target doesn't support 64-bit literals, we need to use the

    // constant as the high 32 MSBs of a double-precision floating point value.

    if (!AsmParser->has64BitLiterals()) {

      Val = static_cast<uint64_t>(Val) << 32;

    } else {

      // Now the target does support 64-bit literals, there are two cases

      // where we still want to use src_literal encoding:

      // 1) explicitly forced by using lit modifier;

      // 2) the value is a valid 32-bit representation (signed or unsigned),

      // meanwhile not forced by lit64 modifier.

      if (getModifiers().Lit ||

          (!getModifiers().Lit64 && (isInt<32>(Val) || isUInt<32>(Val))))

        Val = static_cast<uint64_t>(Val) << 32;

    }


    Inst.addOperand(MCOperand::createImm(Val));

    setImmKindLiteral();

    return;


  case AMDGPU::OPERAND_REG_IMM_INT16:

  case AMDGPU::OPERAND_REG_INLINE_C_INT16:

    if (isSafeTruncation(Val, 16) &&

        AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) {

      Inst.addOperand(MCOperand::createImm(Lo_32(Val)));

      setImmKindConst();

      return;

    }


    Inst.addOperand(MCOperand::createImm(Val & 0xffff));

    setImmKindLiteral();

    return;


  case AMDGPU::OPERAND_REG_INLINE_C_FP16:

  case AMDGPU::OPERAND_REG_IMM_FP16:

    if (isSafeTruncation(Val, 16) &&

        AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),

                                       AsmParser->hasInv2PiInlineImm())) {

      Inst.addOperand(MCOperand::createImm(Val));

      setImmKindConst();

      return;

    }


    Inst.addOperand(MCOperand::createImm(Val & 0xffff));

    setImmKindLiteral();

    return;


  case AMDGPU::OPERAND_REG_IMM_BF16:

  case AMDGPU::OPERAND_REG_INLINE_C_BF16:

    if (isSafeTruncation(Val, 16) &&

        AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),

                                     AsmParser->hasInv2PiInlineImm())) {

      Inst.addOperand(MCOperand::createImm(Val));

      setImmKindConst();

      return;

    }


    Inst.addOperand(MCOperand::createImm(Val & 0xffff));

    setImmKindLiteral();

    return;


  case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: {

    assert(isSafeTruncation(Val, 16));

    assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));

    Inst.addOperand(MCOperand::createImm(Val));

    return;

  }

  case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {

    assert(isSafeTruncation(Val, 16));

    assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),

                                          AsmParser->hasInv2PiInlineImm()));


    Inst.addOperand(MCOperand::createImm(Val));

    return;

  }


  case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: {

    assert(isSafeTruncation(Val, 16));

    assert(AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),

                                          AsmParser->hasInv2PiInlineImm()));


    Inst.addOperand(MCOperand::createImm(Val));

    return;

  }


  case AMDGPU::OPERAND_KIMM32:

    Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));

    setImmKindMandatoryLiteral();

    return;

  case AMDGPU::OPERAND_KIMM16:

    Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));

    setImmKindMandatoryLiteral();

    return;

  case AMDGPU::OPERAND_KIMM64:

    if ((isInt<32>(Val) || isUInt<32>(Val)) && !getModifiers().Lit64)

      Val <<= 32;


    Inst.addOperand(MCOperand::createImm(Val));

    setImmKindMandatoryLiteral();

    return;

  default:

    llvm_unreachable("invalid operand size");

  }

}


void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {

  Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));

}


bool AMDGPUOperand::isInlineValue() const {

  return isRegKind() && ::isInlineValue(getReg());

}


//===----------------------------------------------------------------------===//

// AsmParser

//===----------------------------------------------------------------------===//


void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {

  // TODO: make those pre-defined variables read-only.

  // Currently there is none suitable machinery in the core llvm-mc for this.

  // MCSymbol::isRedefinable is intended for another purpose, and

  // AsmParser::parseDirectiveSet() cannot be specialized for specific target.

  MCContext &Ctx = getContext();

  MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);

  Sym->setVariableValue(MCConstantExpr::create(Val, Ctx));

}


static int getRegClass(RegisterKind Is, unsigned RegWidth) {

  if (Is == IS_VGPR) {

    switch (RegWidth) {

      default: return -1;

      case 32:

        return AMDGPU::VGPR_32RegClassID;

      case 64:

        return AMDGPU::VReg_64RegClassID;

      case 96:

        return AMDGPU::VReg_96RegClassID;

      case 128:

        return AMDGPU::VReg_128RegClassID;

      case 160:

        return AMDGPU::VReg_160RegClassID;

      case 192:

        return AMDGPU::VReg_192RegClassID;

      case 224:

        return AMDGPU::VReg_224RegClassID;

      case 256:

        return AMDGPU::VReg_256RegClassID;

      case 288:

        return AMDGPU::VReg_288RegClassID;

      case 320:

        return AMDGPU::VReg_320RegClassID;

      case 352:

        return AMDGPU::VReg_352RegClassID;

      case 384:

        return AMDGPU::VReg_384RegClassID;

      case 512:

        return AMDGPU::VReg_512RegClassID;

      case 1024:

        return AMDGPU::VReg_1024RegClassID;

    }

  } else if (Is == IS_TTMP) {

    switch (RegWidth) {

      default: return -1;

      case 32:

        return AMDGPU::TTMP_32RegClassID;

      case 64:

        return AMDGPU::TTMP_64RegClassID;

      case 128:

        return AMDGPU::TTMP_128RegClassID;

      case 256:

        return AMDGPU::TTMP_256RegClassID;

      case 512:

        return AMDGPU::TTMP_512RegClassID;

    }

  } else if (Is == IS_SGPR) {

    switch (RegWidth) {

      default: return -1;

      case 32:

        return AMDGPU::SGPR_32RegClassID;

      case 64:

        return AMDGPU::SGPR_64RegClassID;

      case 96:

        return AMDGPU::SGPR_96RegClassID;

      case 128:

        return AMDGPU::SGPR_128RegClassID;

      case 160:

        return AMDGPU::SGPR_160RegClassID;

      case 192:

        return AMDGPU::SGPR_192RegClassID;

      case 224:

        return AMDGPU::SGPR_224RegClassID;

      case 256:

        return AMDGPU::SGPR_256RegClassID;

      case 288:

        return AMDGPU::SGPR_288RegClassID;

      case 320:

        return AMDGPU::SGPR_320RegClassID;

      case 352:

        return AMDGPU::SGPR_352RegClassID;

      case 384:

        return AMDGPU::SGPR_384RegClassID;

      case 512:

        return AMDGPU::SGPR_512RegClassID;

    }

  } else if (Is == IS_AGPR) {

    switch (RegWidth) {

      default: return -1;

      case 32:

        return AMDGPU::AGPR_32RegClassID;

      case 64:

        return AMDGPU::AReg_64RegClassID;

      case 96:

        return AMDGPU::AReg_96RegClassID;

      case 128:

        return AMDGPU::AReg_128RegClassID;

      case 160:

        return AMDGPU::AReg_160RegClassID;

      case 192:

        return AMDGPU::AReg_192RegClassID;

      case 224:

        return AMDGPU::AReg_224RegClassID;

      case 256:

        return AMDGPU::AReg_256RegClassID;

      case 288:

        return AMDGPU::AReg_288RegClassID;

      case 320:

        return AMDGPU::AReg_320RegClassID;

      case 352:

        return AMDGPU::AReg_352RegClassID;

      case 384:

        return AMDGPU::AReg_384RegClassID;

      case 512:

        return AMDGPU::AReg_512RegClassID;

      case 1024:

        return AMDGPU::AReg_1024RegClassID;

    }

  }

  return -1;

}


static MCRegister getSpecialRegForName(StringRef RegName) {

  return StringSwitch<unsigned>(RegName)

      .Case("exec", AMDGPU::EXEC)

      .Case("vcc", AMDGPU::VCC)

      .Case("flat_scratch", AMDGPU::FLAT_SCR)

      .Case("xnack_mask", AMDGPU::XNACK_MASK)

      .Case("shared_base", AMDGPU::SRC_SHARED_BASE)

      .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)

      .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)

      .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)

      .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)

      .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)

      .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)

      .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)

      .Case("src_flat_scratch_base_lo", AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)

      .Case("src_flat_scratch_base_hi", AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)

      .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)

      .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)

      .Case("lds_direct", AMDGPU::LDS_DIRECT)

      .Case("src_lds_direct", AMDGPU::LDS_DIRECT)

      .Case("m0", AMDGPU::M0)

      .Case("vccz", AMDGPU::SRC_VCCZ)

      .Case("src_vccz", AMDGPU::SRC_VCCZ)

      .Case("execz", AMDGPU::SRC_EXECZ)

      .Case("src_execz", AMDGPU::SRC_EXECZ)

      .Case("scc", AMDGPU::SRC_SCC)

      .Case("src_scc", AMDGPU::SRC_SCC)

      .Case("tba", AMDGPU::TBA)

      .Case("tma", AMDGPU::TMA)

      .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)

      .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)

      .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)

      .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)

      .Case("vcc_lo", AMDGPU::VCC_LO)

      .Case("vcc_hi", AMDGPU::VCC_HI)

      .Case("exec_lo", AMDGPU::EXEC_LO)

      .Case("exec_hi", AMDGPU::EXEC_HI)

      .Case("tma_lo", AMDGPU::TMA_LO)

      .Case("tma_hi", AMDGPU::TMA_HI)

      .Case("tba_lo", AMDGPU::TBA_LO)

      .Case("tba_hi", AMDGPU::TBA_HI)

      .Case("pc", AMDGPU::PC_REG)

      .Case("null", AMDGPU::SGPR_NULL)

      .Default(AMDGPU::NoRegister);

}


bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,

                                    SMLoc &EndLoc, bool RestoreOnFailure) {

  auto R = parseRegister();

  if (!R) return true;

  assert(R->isReg());

  RegNo = R->getReg();

  StartLoc = R->getStartLoc();

  EndLoc = R->getEndLoc();

  return false;

}


bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,

                                    SMLoc &EndLoc) {

  return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);

}


ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,

                                              SMLoc &EndLoc) {

  bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);

  bool PendingErrors = getParser().hasPendingError();

  getParser().clearPendingErrors();

  if (PendingErrors)

    return ParseStatus::Failure;

  if (Result)

    return ParseStatus::NoMatch;

  return ParseStatus::Success;

}


bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,

                                            RegisterKind RegKind,

                                            MCRegister Reg1, SMLoc Loc) {

  switch (RegKind) {

  case IS_SPECIAL:

    if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {

      Reg = AMDGPU::EXEC;

      RegWidth = 64;

      return true;

    }

    if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {

      Reg = AMDGPU::FLAT_SCR;

      RegWidth = 64;

      return true;

    }

    if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {

      Reg = AMDGPU::XNACK_MASK;

      RegWidth = 64;

      return true;

    }

    if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {

      Reg = AMDGPU::VCC;

      RegWidth = 64;

      return true;

    }

    if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {

      Reg = AMDGPU::TBA;

      RegWidth = 64;

      return true;

    }

    if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {

      Reg = AMDGPU::TMA;

      RegWidth = 64;

      return true;

    }

    Error(Loc, "register does not fit in the list");

    return false;

  case IS_VGPR:

  case IS_SGPR:

  case IS_AGPR:

  case IS_TTMP:

    if (Reg1 != Reg + RegWidth / 32) {

      Error(Loc, "registers in a list must have consecutive indices");

      return false;

    }

    RegWidth += 32;

    return true;

  default:

    llvm_unreachable("unexpected register kind");

  }

}


struct RegInfo {

  StringLiteral Name;

  RegisterKind Kind;

};


static constexpr RegInfo RegularRegisters[] = {

  {{"v"},    IS_VGPR},

  {{"s"},    IS_SGPR},

  {{"ttmp"}, IS_TTMP},

  {{"acc"},  IS_AGPR},

  {{"a"},    IS_AGPR},

};


static bool isRegularReg(RegisterKind Kind) {

  return Kind == IS_VGPR ||

         Kind == IS_SGPR ||

         Kind == IS_TTMP ||

         Kind == IS_AGPR;

}


static const RegInfo* getRegularRegInfo(StringRef Str) {

  for (const RegInfo &Reg : RegularRegisters)

    if (Str.starts_with(Reg.Name))

      return &Reg;

  return nullptr;

}


static bool getRegNum(StringRef Str, unsigned& Num) {

  return !Str.getAsInteger(10, Num);

}


bool

AMDGPUAsmParser::isRegister(const AsmToken &Token,

                            const AsmToken &NextToken) const {


  // A list of consecutive registers: [s0,s1,s2,s3]

  if (Token.is(AsmToken::LBrac))

    return true;


  if (!Token.is(AsmToken::Identifier))

    return false;


  // A single register like s0 or a range of registers like s[0:1]


  StringRef Str = Token.getString();

  const RegInfo *Reg = getRegularRegInfo(Str);

  if (Reg) {

    StringRef RegName = Reg->Name;

    StringRef RegSuffix = Str.substr(RegName.size());

    if (!RegSuffix.empty()) {

      RegSuffix.consume_back(".l");

      RegSuffix.consume_back(".h");

      unsigned Num;

      // A single register with an index: rXX

      if (getRegNum(RegSuffix, Num))

        return true;

    } else {

      // A range of registers: r[XX:YY].

      if (NextToken.is(AsmToken::LBrac))

        return true;

    }

  }


  return getSpecialRegForName(Str).isValid();

}


bool

AMDGPUAsmParser::isRegister()

{

  return isRegister(getToken(), peekToken());

}


MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,

                                          unsigned SubReg, unsigned RegWidth,

                                          SMLoc Loc) {

  assert(isRegularReg(RegKind));


  unsigned AlignSize = 1;

  if (RegKind == IS_SGPR || RegKind == IS_TTMP) {

    // SGPR and TTMP registers must be aligned.

    // Max required alignment is 4 dwords.

    AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);

  }


  if (RegNum % AlignSize != 0) {

    Error(Loc, "invalid register alignment");

    return MCRegister();

  }


  unsigned RegIdx = RegNum / AlignSize;

  int RCID = getRegClass(RegKind, RegWidth);

  if (RCID == -1) {

    Error(Loc, "invalid or unsupported register size");

    return MCRegister();

  }


  const MCRegisterInfo *TRI = getContext().getRegisterInfo();

  const MCRegisterClass RC = TRI->getRegClass(RCID);

  if (RegIdx >= RC.getNumRegs() || (RegKind == IS_VGPR && RegIdx > 255)) {

    Error(Loc, "register index is out of range");

    return AMDGPU::NoRegister;

  }


  if (RegKind == IS_VGPR && !isGFX1250() && RegIdx + RegWidth / 32 > 256) {

    Error(Loc, "register index is out of range");

    return MCRegister();

  }


  MCRegister Reg = RC.getRegister(RegIdx);


  if (SubReg) {

    Reg = TRI->getSubReg(Reg, SubReg);


    // Currently all regular registers have their .l and .h subregisters, so

    // we should never need to generate an error here.

    assert(Reg && "Invalid subregister!");

  }


  return Reg;

}


bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth,

                                    unsigned &SubReg) {

  int64_t RegLo, RegHi;

  if (!skipToken(AsmToken::LBrac, "missing register index"))

    return false;


  SMLoc FirstIdxLoc = getLoc();

  SMLoc SecondIdxLoc;


  if (!parseExpr(RegLo))

    return false;


  if (trySkipToken(AsmToken::Colon)) {

    SecondIdxLoc = getLoc();

    if (!parseExpr(RegHi))

      return false;

  } else {

    RegHi = RegLo;

  }


  if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))

    return false;


  if (!isUInt<32>(RegLo)) {

    Error(FirstIdxLoc, "invalid register index");

    return false;

  }


  if (!isUInt<32>(RegHi)) {

    Error(SecondIdxLoc, "invalid register index");

    return false;

  }


  if (RegLo > RegHi) {

    Error(FirstIdxLoc, "first register index should not exceed second index");

    return false;

  }


  if (RegHi == RegLo) {

    StringRef RegSuffix = getTokenStr();

    if (RegSuffix == ".l") {

      SubReg = AMDGPU::lo16;

      lex();

    } else if (RegSuffix == ".h") {

      SubReg = AMDGPU::hi16;

      lex();

    }

  }


  Num = static_cast<unsigned>(RegLo);

  RegWidth = 32 * ((RegHi - RegLo) + 1);


  return true;

}


MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,

                                            unsigned &RegNum,

                                            unsigned &RegWidth,

                                            SmallVectorImpl<AsmToken> &Tokens) {

  assert(isToken(AsmToken::Identifier));

  MCRegister Reg = getSpecialRegForName(getTokenStr());

  if (Reg) {

    RegNum = 0;

    RegWidth = 32;

    RegKind = IS_SPECIAL;

    Tokens.push_back(getToken());

    lex(); // skip register name

  }

  return Reg;

}


MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,

                                            unsigned &RegNum,

                                            unsigned &RegWidth,

                                            SmallVectorImpl<AsmToken> &Tokens) {

  assert(isToken(AsmToken::Identifier));

  StringRef RegName = getTokenStr();

  auto Loc = getLoc();


  const RegInfo *RI = getRegularRegInfo(RegName);

  if (!RI) {

    Error(Loc, "invalid register name");

    return MCRegister();

  }


  Tokens.push_back(getToken());

  lex(); // skip register name


  RegKind = RI->Kind;

  StringRef RegSuffix = RegName.substr(RI->Name.size());

  unsigned SubReg = NoSubRegister;

  if (!RegSuffix.empty()) {

    if (RegSuffix.consume_back(".l"))

      SubReg = AMDGPU::lo16;

    else if (RegSuffix.consume_back(".h"))

      SubReg = AMDGPU::hi16;


    // Single 32-bit register: vXX.

    if (!getRegNum(RegSuffix, RegNum)) {

      Error(Loc, "invalid register index");

      return MCRegister();

    }

    RegWidth = 32;

  } else {

    // Range of registers: v[XX:YY]. ":YY" is optional.

    if (!ParseRegRange(RegNum, RegWidth, SubReg))

      return MCRegister();

  }


  return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);

}


MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,

                                         unsigned &RegNum, unsigned &RegWidth,

                                         SmallVectorImpl<AsmToken> &Tokens) {

  MCRegister Reg;

  auto ListLoc = getLoc();


  if (!skipToken(AsmToken::LBrac,

                 "expected a register or a list of registers")) {

    return MCRegister();

  }


  // List of consecutive registers, e.g.: [s0,s1,s2,s3]


  auto Loc = getLoc();

  if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))

    return MCRegister();

  if (RegWidth != 32) {

    Error(Loc, "expected a single 32-bit register");

    return MCRegister();

  }


  for (; trySkipToken(AsmToken::Comma); ) {

    RegisterKind NextRegKind;

    MCRegister NextReg;

    unsigned NextRegNum, NextRegWidth;

    Loc = getLoc();


    if (!ParseAMDGPURegister(NextRegKind, NextReg,

                             NextRegNum, NextRegWidth,

                             Tokens)) {

      return MCRegister();

    }

    if (NextRegWidth != 32) {

      Error(Loc, "expected a single 32-bit register");

      return MCRegister();

    }

    if (NextRegKind != RegKind) {

      Error(Loc, "registers in a list must be of the same kind");

      return MCRegister();

    }

    if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))

      return MCRegister();

  }


  if (!skipToken(AsmToken::RBrac,

                 "expected a comma or a closing square bracket")) {

    return MCRegister();

  }


  if (isRegularReg(RegKind))

    Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);


  return Reg;

}


bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,

                                          MCRegister &Reg, unsigned &RegNum,

                                          unsigned &RegWidth,

                                          SmallVectorImpl<AsmToken> &Tokens) {

  auto Loc = getLoc();

  Reg = MCRegister();


  if (isToken(AsmToken::Identifier)) {

    Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);

    if (!Reg)

      Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);

  } else {

    Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);

  }


  const MCRegisterInfo *TRI = getContext().getRegisterInfo();

  if (!Reg) {

    assert(Parser.hasPendingError());

    return false;

  }


  if (!subtargetHasRegister(*TRI, Reg)) {

    if (Reg == AMDGPU::SGPR_NULL) {

      Error(Loc, "'null' operand is not supported on this GPU");

    } else {

      Error(Loc, Twine(AMDGPUInstPrinter::getRegisterName(Reg)) +

                     " register not available on this GPU");

    }

    return false;

  }


  return true;

}


bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,

                                          MCRegister &Reg, unsigned &RegNum,

                                          unsigned &RegWidth,

                                          bool RestoreOnFailure /*=false*/) {

  Reg = MCRegister();


  SmallVector<AsmToken, 1> Tokens;

  if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {

    if (RestoreOnFailure) {

      while (!Tokens.empty()) {

        getLexer().UnLex(Tokens.pop_back_val());

      }

    }

    return true;

  }

  return false;

}


std::optional<StringRef>

AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {

  switch (RegKind) {

  case IS_VGPR:

    return StringRef(".amdgcn.next_free_vgpr");

  case IS_SGPR:

    return StringRef(".amdgcn.next_free_sgpr");

  default:

    return std::nullopt;

  }

}


void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {

  auto SymbolName = getGprCountSymbolName(RegKind);

  assert(SymbolName && "initializing invalid register kind");

  MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);

  Sym->setVariableValue(MCConstantExpr::create(0, getContext()));

  Sym->setRedefinable(true);

}


bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,

                                            unsigned DwordRegIndex,

                                            unsigned RegWidth) {

  // Symbols are only defined for GCN targets

  if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)

    return true;


  auto SymbolName = getGprCountSymbolName(RegKind);

  if (!SymbolName)

    return true;

  MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);


  int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;

  int64_t OldCount;


  if (!Sym->isVariable())

    return !Error(getLoc(),

                  ".amdgcn.next_free_{v,s}gpr symbols must be variable");

  if (!Sym->getVariableValue()->evaluateAsAbsolute(OldCount))

    return !Error(

        getLoc(),

        ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");


  if (OldCount <= NewMax)

    Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));


  return true;

}


std::unique_ptr<AMDGPUOperand>

AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {

  const auto &Tok = getToken();

  SMLoc StartLoc = Tok.getLoc();

  SMLoc EndLoc = Tok.getEndLoc();

  RegisterKind RegKind;

  MCRegister Reg;

  unsigned RegNum, RegWidth;


  if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {

    return nullptr;

  }

  if (isHsaAbi(getSTI())) {

    if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))

      return nullptr;

  } else

    KernelScope.usesRegister(RegKind, RegNum, RegWidth);

  return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);

}


ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,

                                      bool HasSP3AbsModifier, bool HasLit,

                                      bool HasLit64) {

  // TODO: add syntactic sugar for 1/(2*PI)


  if (isRegister() || isModifier())

    return ParseStatus::NoMatch;


  if (!HasLit && !HasLit64) {

    HasLit64 = trySkipId("lit64");

    HasLit = !HasLit64 && trySkipId("lit");

    if (HasLit || HasLit64) {

      if (!skipToken(AsmToken::LParen, "expected left paren after lit"))

        return ParseStatus::Failure;

      ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit, HasLit64);

      if (S.isSuccess() &&

          !skipToken(AsmToken::RParen, "expected closing parentheses"))

        return ParseStatus::Failure;

      return S;

    }

  }


  const auto& Tok = getToken();

  const auto& NextTok = peekToken();

  bool IsReal = Tok.is(AsmToken::Real);

  SMLoc S = getLoc();

  bool Negate = false;


  if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {

    lex();

    IsReal = true;

    Negate = true;

  }


  AMDGPUOperand::Modifiers Mods;

  Mods.Lit = HasLit;

  Mods.Lit64 = HasLit64;


  if (IsReal) {

    // Floating-point expressions are not supported.

    // Can only allow floating-point literals with an

    // optional sign.


    StringRef Num = getTokenStr();

    lex();


    APFloat RealVal(APFloat::IEEEdouble());

    auto roundMode = APFloat::rmNearestTiesToEven;

    if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))

      return ParseStatus::Failure;

    if (Negate)

      RealVal.changeSign();


    Operands.push_back(

      AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,

                               AMDGPUOperand::ImmTyNone, true));

    AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());

    Op.setModifiers(Mods);


    return ParseStatus::Success;


  } else {

    int64_t IntVal;

    const MCExpr *Expr;

    SMLoc S = getLoc();


    if (HasSP3AbsModifier) {

      // This is a workaround for handling expressions

      // as arguments of SP3 'abs' modifier, for example:

      //     |1.0|

      //     |-1|

      //     |1+x|

      // This syntax is not compatible with syntax of standard

      // MC expressions (due to the trailing '|').

      SMLoc EndLoc;

      if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))

        return ParseStatus::Failure;

    } else {

      if (Parser.parseExpression(Expr))

        return ParseStatus::Failure;

    }


    if (Expr->evaluateAsAbsolute(IntVal)) {

      Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));

      AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());

      Op.setModifiers(Mods);

    } else {

      if (HasLit || HasLit64)

        return ParseStatus::NoMatch;

      Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));

    }


    return ParseStatus::Success;

  }


  return ParseStatus::NoMatch;

}


ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {

  if (!isRegister())

    return ParseStatus::NoMatch;


  if (auto R = parseRegister()) {

    assert(R->isReg());

    Operands.push_back(std::move(R));

    return ParseStatus::Success;

  }

  return ParseStatus::Failure;

}


ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,

                                           bool HasSP3AbsMod, bool HasLit,

                                           bool HasLit64) {

  ParseStatus Res = parseReg(Operands);

  if (!Res.isNoMatch())

    return Res;

  if (isModifier())

    return ParseStatus::NoMatch;

  return parseImm(Operands, HasSP3AbsMod, HasLit, HasLit64);

}


bool

AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {

  if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {

    const auto &str = Token.getString();

    return str == "abs" || str == "neg" || str == "sext";

  }

  return false;

}


bool

AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {

  return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);

}


bool

AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {

  return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);

}


bool

AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {

  return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);

}


// Check if this is an operand modifier or an opcode modifier

// which may look like an expression but it is not. We should

// avoid parsing these modifiers as expressions. Currently

// recognized sequences are:

//   |...|

//   abs(...)

//   neg(...)

//   sext(...)

//   -reg

//   -|...|

//   -abs(...)

//   name:...

//

bool

AMDGPUAsmParser::isModifier() {


  AsmToken Tok = getToken();

  AsmToken NextToken[2];

  peekTokens(NextToken);


  return isOperandModifier(Tok, NextToken[0]) ||

         (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||

         isOpcodeModifierWithVal(Tok, NextToken[0]);

}


// Check if the current token is an SP3 'neg' modifier.

// Currently this modifier is allowed in the following context:

//

// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".

// 2. Before an 'abs' modifier: -abs(...)

// 3. Before an SP3 'abs' modifier: -|...|

//

// In all other cases "-" is handled as a part

// of an expression that follows the sign.

//

// Note: When "-" is followed by an integer literal,

// this is interpreted as integer negation rather

// than a floating-point NEG modifier applied to N.

// Beside being contr-intuitive, such use of floating-point

// NEG modifier would have resulted in different meaning

// of integer literals used with VOP1/2/C and VOP3,

// for example:

//    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF

//    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001

// Negative fp literals with preceding "-" are

// handled likewise for uniformity

//

bool

AMDGPUAsmParser::parseSP3NegModifier() {


  AsmToken NextToken[2];

  peekTokens(NextToken);


  if (isToken(AsmToken::Minus) &&

      (isRegister(NextToken[0], NextToken[1]) ||

       NextToken[0].is(AsmToken::Pipe) ||

       isId(NextToken[0], "abs"))) {

    lex();

    return true;

  }


  return false;

}


ParseStatus

AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,

                                              bool AllowImm) {

  bool Neg, SP3Neg;

  bool Abs, SP3Abs;

  bool Lit64, Lit;

  SMLoc Loc;


  // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.

  if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))

    return Error(getLoc(), "invalid syntax, expected 'neg' modifier");


  SP3Neg = parseSP3NegModifier();


  Loc = getLoc();

  Neg = trySkipId("neg");

  if (Neg && SP3Neg)

    return Error(Loc, "expected register or immediate");

  if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))

    return ParseStatus::Failure;


  Abs = trySkipId("abs");

  if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))

    return ParseStatus::Failure;


  Lit64 = trySkipId("lit64");

  if (Lit64) {

    if (!skipToken(AsmToken::LParen, "expected left paren after lit64"))

      return ParseStatus::Failure;

    if (!has64BitLiterals())

      return Error(Loc, "lit64 is not supported on this GPU");

  }


  Lit = !Lit64 && trySkipId("lit");

  if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit"))

    return ParseStatus::Failure;


  Loc = getLoc();

  SP3Abs = trySkipToken(AsmToken::Pipe);

  if (Abs && SP3Abs)

    return Error(Loc, "expected register or immediate");


  ParseStatus Res;

  if (AllowImm) {

    Res = parseRegOrImm(Operands, SP3Abs, Lit, Lit64);

  } else {

    Res = parseReg(Operands);

  }

  if (!Res.isSuccess())

    return (SP3Neg || Neg || SP3Abs || Abs || Lit || Lit64)

               ? ParseStatus::Failure

               : Res;


  if ((Lit || Lit64) && !Operands.back()->isImm())

    Error(Loc, "expected immediate with lit modifier");


  if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))

    return ParseStatus::Failure;

  if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))

    return ParseStatus::Failure;

  if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))

    return ParseStatus::Failure;

  if ((Lit || Lit64) &&

      !skipToken(AsmToken::RParen, "expected closing parentheses"))

    return ParseStatus::Failure;


  AMDGPUOperand::Modifiers Mods;

  Mods.Abs = Abs || SP3Abs;

  Mods.Neg = Neg || SP3Neg;

  Mods.Lit = Lit;

  Mods.Lit64 = Lit64;


  if (Mods.hasFPModifiers() || Lit || Lit64) {

    AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());

    if (Op.isExpr())

      return Error(Op.getStartLoc(), "expected an absolute expression");

    Op.setModifiers(Mods);

  }

  return ParseStatus::Success;

}


ParseStatus

AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,

                                               bool AllowImm) {

  bool Sext = trySkipId("sext");

  if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))

    return ParseStatus::Failure;


  ParseStatus Res;

  if (AllowImm) {

    Res = parseRegOrImm(Operands);

  } else {

    Res = parseReg(Operands);

  }

  if (!Res.isSuccess())

    return Sext ? ParseStatus::Failure : Res;


  if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))

    return ParseStatus::Failure;


  AMDGPUOperand::Modifiers Mods;

  Mods.Sext = Sext;


  if (Mods.hasIntModifiers()) {

    AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());

    if (Op.isExpr())

      return Error(Op.getStartLoc(), "expected an absolute expression");

    Op.setModifiers(Mods);

  }


  return ParseStatus::Success;

}


ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {

  return parseRegOrImmWithFPInputMods(Operands, false);

}


ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {

  return parseRegOrImmWithIntInputMods(Operands, false);

}


ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {

  auto Loc = getLoc();

  if (trySkipId("off")) {

    Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,

                                                AMDGPUOperand::ImmTyOff, false));

    return ParseStatus::Success;

  }


  if (!isRegister())

    return ParseStatus::NoMatch;


  std::unique_ptr<AMDGPUOperand> Reg = parseRegister();

  if (Reg) {

    Operands.push_back(std::move(Reg));

    return ParseStatus::Success;

  }


  return ParseStatus::Failure;

}


unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {

  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;


  if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||

      (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||

      (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||

      (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )

    return Match_InvalidOperand;


  if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||

      Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {

    // v_mac_f32/16 allow only dst_sel == DWORD;

    auto OpNum =

        AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);

    const auto &Op = Inst.getOperand(OpNum);

    if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {

      return Match_InvalidOperand;

    }

  }


  // Asm can first try to match VOPD or VOPD3. By failing early here with

  // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.

  // Checking later during validateInstruction does not give a chance to retry

  // parsing as a different encoding.

  if (tryAnotherVOPDEncoding(Inst))

    return Match_InvalidOperand;


  return Match_Success;

}


static ArrayRef<unsigned> getAllVariants() {

  static const unsigned Variants[] = {

    AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,

    AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9,

    AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP

  };


  return ArrayRef(Variants);

}


// What asm variants we should check

ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {

  if (isForcedDPP() && isForcedVOP3()) {

    static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};

    return ArrayRef(Variants);

  }

  if (getForcedEncodingSize() == 32) {

    static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};

    return ArrayRef(Variants);

  }


  if (isForcedVOP3()) {

    static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};

    return ArrayRef(Variants);

  }


  if (isForcedSDWA()) {

    static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,

                                        AMDGPUAsmVariants::SDWA9};

    return ArrayRef(Variants);

  }


  if (isForcedDPP()) {

    static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};

    return ArrayRef(Variants);

  }


  return getAllVariants();

}


StringRef AMDGPUAsmParser::getMatchedVariantName() const {

  if (isForcedDPP() && isForcedVOP3())

    return "e64_dpp";


  if (getForcedEncodingSize() == 32)

    return "e32";


  if (isForcedVOP3())

    return "e64";


  if (isForcedSDWA())

    return "sdwa";


  if (isForcedDPP())

    return "dpp";


  return "";

}


unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {

  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());

  for (MCPhysReg Reg : Desc.implicit_uses()) {

    switch (Reg) {

    case AMDGPU::FLAT_SCR:

    case AMDGPU::VCC:

    case AMDGPU::VCC_LO:

    case AMDGPU::VCC_HI:

    case AMDGPU::M0:

      return Reg;

    default:

      break;

    }

  }

  return AMDGPU::NoRegister;

}


// NB: This code is correct only when used to check constant

// bus limitations because GFX7 support no f16 inline constants.

// Note that there are no cases when a GFX7 opcode violates

// constant bus limitations due to the use of an f16 constant.

bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,

                                       unsigned OpIdx) const {

  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());


  if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||

      AMDGPU::isKImmOperand(Desc, OpIdx)) {

    return false;

  }


  const MCOperand &MO = Inst.getOperand(OpIdx);


  int64_t Val = MO.getImm();

  auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);


  switch (OpSize) { // expected operand size

  case 8:

    return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());

  case 4:

    return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());

  case 2: {

    const unsigned OperandType = Desc.operands()[OpIdx].OperandType;

    if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||

        OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16)

      return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());


    if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||

        OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)

      return AMDGPU::isInlinableLiteralV2I16(Val);


    if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||

        OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)

      return AMDGPU::isInlinableLiteralV2F16(Val);


    if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2BF16 ||

        OperandType == AMDGPU::OPERAND_REG_IMM_V2BF16)

      return AMDGPU::isInlinableLiteralV2BF16(Val);


    if (OperandType == AMDGPU::OPERAND_REG_IMM_FP16 ||

        OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP16)

      return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());


    if (OperandType == AMDGPU::OPERAND_REG_IMM_BF16 ||

        OperandType == AMDGPU::OPERAND_REG_INLINE_C_BF16)

      return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());


    if (OperandType == AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16)

      return false;


    llvm_unreachable("invalid operand type");

  }

  default:

    llvm_unreachable("invalid operand size");

  }

}


unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {

  if (!isGFX10Plus())

    return 1;


  switch (Opcode) {

  // 64-bit shift instructions can use only one scalar value input

  case AMDGPU::V_LSHLREV_B64_e64:

  case AMDGPU::V_LSHLREV_B64_gfx10:

  case AMDGPU::V_LSHLREV_B64_e64_gfx11:

  case AMDGPU::V_LSHLREV_B64_e32_gfx12:

  case AMDGPU::V_LSHLREV_B64_e64_gfx12:

  case AMDGPU::V_LSHRREV_B64_e64:

  case AMDGPU::V_LSHRREV_B64_gfx10:

  case AMDGPU::V_LSHRREV_B64_e64_gfx11:

  case AMDGPU::V_LSHRREV_B64_e64_gfx12:

  case AMDGPU::V_ASHRREV_I64_e64:

  case AMDGPU::V_ASHRREV_I64_gfx10:

  case AMDGPU::V_ASHRREV_I64_e64_gfx11:

  case AMDGPU::V_ASHRREV_I64_e64_gfx12:

  case AMDGPU::V_LSHL_B64_e64:

  case AMDGPU::V_LSHR_B64_e64:

  case AMDGPU::V_ASHR_I64_e64:

    return 1;

  default:

    return 2;

  }

}


constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;

using OperandIndices = SmallVector<int16_t, MAX_SRC_OPERANDS_NUM>;


// Get regular operand indices in the same order as specified

// in the instruction (but append mandatory literals to the end).


static OperandIndices getSrcOperandIndices(unsigned Opcode,

                                           bool AddMandatoryLiterals = false) {


  int16_t ImmIdx =

      AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;


  if (isVOPD(Opcode)) {

    int16_t ImmXIdx =

        AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immX) : -1;


    return {getNamedOperandIdx(Opcode, OpName::src0X),

            getNamedOperandIdx(Opcode, OpName::vsrc1X),

            getNamedOperandIdx(Opcode, OpName::vsrc2X),

            getNamedOperandIdx(Opcode, OpName::src0Y),

            getNamedOperandIdx(Opcode, OpName::vsrc1Y),

            getNamedOperandIdx(Opcode, OpName::vsrc2Y),

            ImmXIdx,

            ImmIdx};

  }


  return {getNamedOperandIdx(Opcode, OpName::src0),

          getNamedOperandIdx(Opcode, OpName::src1),

          getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};

}


bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {

  const MCOperand &MO = Inst.getOperand(OpIdx);

  if (MO.isImm())

    return !isInlineConstant(Inst, OpIdx);

  if (MO.isReg()) {

    auto Reg = MO.getReg();

    if (!Reg)

      return false;

    const MCRegisterInfo *TRI = getContext().getRegisterInfo();

    auto PReg = mc2PseudoReg(Reg);

    return isSGPR(PReg, TRI) && PReg != SGPR_NULL;

  }

  return true;

}


// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:

// Writelane is special in that it can use SGPR and M0 (which would normally

// count as using the constant bus twice - but in this case it is allowed since

// the lane selector doesn't count as a use of the constant bus). However, it is

// still required to abide by the 1 SGPR rule.


static bool checkWriteLane(const MCInst &Inst) {

  const unsigned Opcode = Inst.getOpcode();

  if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)

    return false;

  const MCOperand &LaneSelOp = Inst.getOperand(2);

  if (!LaneSelOp.isReg())

    return false;

  auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());

  return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;

}


bool AMDGPUAsmParser::validateConstantBusLimitations(

    const MCInst &Inst, const OperandVector &Operands) {

  const unsigned Opcode = Inst.getOpcode();

  const MCInstrDesc &Desc = MII.get(Opcode);

  MCRegister LastSGPR;

  unsigned ConstantBusUseCount = 0;

  unsigned NumLiterals = 0;

  unsigned LiteralSize;


  if (!(Desc.TSFlags &

        (SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |

         SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | SIInstrFlags::SDWA)) &&

      !isVOPD(Opcode))

    return true;


  if (checkWriteLane(Inst))

    return true;


  // Check special imm operands (used by madmk, etc)

  if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {

    ++NumLiterals;

    LiteralSize = 4;

  }


  SmallDenseSet<unsigned> SGPRsUsed;

  unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);

  if (SGPRUsed != AMDGPU::NoRegister) {

    SGPRsUsed.insert(SGPRUsed);

    ++ConstantBusUseCount;

  }


  OperandIndices OpIndices = getSrcOperandIndices(Opcode);


  for (int OpIdx : OpIndices) {

    if (OpIdx == -1)

      continue;


    const MCOperand &MO = Inst.getOperand(OpIdx);

    if (usesConstantBus(Inst, OpIdx)) {

      if (MO.isReg()) {

        LastSGPR = mc2PseudoReg(MO.getReg());

        // Pairs of registers with a partial intersections like these

        //   s0, s[0:1]

        //   flat_scratch_lo, flat_scratch

        //   flat_scratch_lo, flat_scratch_hi

        // are theoretically valid but they are disabled anyway.

        // Note that this code mimics SIInstrInfo::verifyInstruction

        if (SGPRsUsed.insert(LastSGPR).second) {

          ++ConstantBusUseCount;

        }

      } else { // Expression or a literal


        if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)

          continue; // special operand like VINTERP attr_chan


        // An instruction may use only one literal.

        // This has been validated on the previous step.

        // See validateVOPLiteral.

        // This literal may be used as more than one operand.

        // If all these operands are of the same size,

        // this literal counts as one scalar value.

        // Otherwise it counts as 2 scalar values.

        // See "GFX10 Shader Programming", section 3.6.2.3.


        unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);

        if (Size < 4)

          Size = 4;


        if (NumLiterals == 0) {

          NumLiterals = 1;

          LiteralSize = Size;

        } else if (LiteralSize != Size) {

          NumLiterals = 2;

        }

      }

    }

  }

  ConstantBusUseCount += NumLiterals;


  if (ConstantBusUseCount <= getConstantBusLimit(Opcode))

    return true;


  SMLoc LitLoc = getLitLoc(Operands);

  SMLoc RegLoc = getRegLoc(LastSGPR, Operands);

  SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;

  Error(Loc, "invalid operand (violates constant bus restrictions)");

  return false;

}


std::optional<unsigned>

AMDGPUAsmParser::checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3) {


  const unsigned Opcode = Inst.getOpcode();

  if (!isVOPD(Opcode))

    return {};


  const MCRegisterInfo *TRI = getContext().getRegisterInfo();


  auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {

    const MCOperand &Opr = Inst.getOperand(OperandIdx);

    return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))

               ? Opr.getReg()

               : MCRegister();

  };


  // On GFX12+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2

  // source-cache.

  bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||

                 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||

                 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250;

  bool AllowSameVGPR = isGFX1250();


  if (AsVOPD3) { // Literal constants are not allowed with VOPD3.

    for (auto OpName : {OpName::src0X, OpName::src0Y}) {

      int I = getNamedOperandIdx(Opcode, OpName);

      const MCOperand &Op = Inst.getOperand(I);

      if (!Op.isImm())

        continue;

      int64_t Imm = Op.getImm();

      if (!AMDGPU::isInlinableLiteral32(Imm, hasInv2PiInlineImm()) &&

          !AMDGPU::isInlinableLiteral64(Imm, hasInv2PiInlineImm()))

        return (unsigned)I;

    }


    for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,

                        OpName::vsrc2Y, OpName::imm}) {

      int I = getNamedOperandIdx(Opcode, OpName);

      if (I == -1)

        continue;

      const MCOperand &Op = Inst.getOperand(I);

      if (Op.isImm())

        return (unsigned)I;

    }

  }


  const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);

  auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(

      getVRegIdx, *TRI, SkipSrc, AllowSameVGPR, AsVOPD3);


  return InvalidCompOprIdx;

}


bool AMDGPUAsmParser::validateVOPD(const MCInst &Inst,

                                   const OperandVector &Operands) {


  unsigned Opcode = Inst.getOpcode();

  bool AsVOPD3 = MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3;


  if (AsVOPD3) {

    for (const std::unique_ptr<MCParsedAsmOperand> &Operand : Operands) {

      AMDGPUOperand &Op = (AMDGPUOperand &)*Operand;

      if ((Op.isRegKind() || Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&

          (Op.getModifiers().getFPModifiersOperand() & SISrcMods::ABS))

        Error(Op.getStartLoc(), "ABS not allowed in VOPD3 instructions");

    }

  }


  auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);

  if (!InvalidCompOprIdx.has_value())

    return true;


  auto CompOprIdx = *InvalidCompOprIdx;

  const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);

  auto ParsedIdx =

      std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),

               InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));

  assert(ParsedIdx > 0 && ParsedIdx < Operands.size());


  auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();

  if (CompOprIdx == VOPD::Component::DST) {

    if (AsVOPD3)

      Error(Loc, "dst registers must be distinct");

    else

      Error(Loc, "one dst register must be even and the other odd");

  } else {

    auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;

    Error(Loc, Twine("src") + Twine(CompSrcIdx) +

                   " operands must use different VGPR banks");

  }


  return false;

}


// \returns true if \p Inst does not satisfy VOPD constraints, but can be

// potentially used as VOPD3 with the same operands.

bool AMDGPUAsmParser::tryVOPD3(const MCInst &Inst) {

  // First check if it fits VOPD

  auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, false);

  if (!InvalidCompOprIdx.has_value())

    return false;


  // Then if it fits VOPD3

  InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, true);

  if (InvalidCompOprIdx.has_value()) {

    // If failed operand is dst it is better to show error about VOPD3

    // instruction as it has more capabilities and error message will be

    // more informative. If the dst is not legal for VOPD3, then it is not

    // legal for VOPD either.

    if (*InvalidCompOprIdx == VOPD::Component::DST)

      return true;


    // Otherwise prefer VOPD as we may find ourselves in an awkward situation

    // with a conflict in tied implicit src2 of fmac and no asm operand to

    // to point to.

    return false;

  }

  return true;

}


// \returns true is a VOPD3 instruction can be also represented as a shorter

// VOPD encoding.

bool AMDGPUAsmParser::tryVOPD(const MCInst &Inst) {

  const unsigned Opcode = Inst.getOpcode();

  const auto &II = getVOPDInstInfo(Opcode, &MII);

  unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(getSTI());

  if (!getCanBeVOPD(II[VOPD::X].getOpcode(), EncodingFamily, false).X ||

      !getCanBeVOPD(II[VOPD::Y].getOpcode(), EncodingFamily, false).Y)

    return false;


  // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has

  // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot

  // be parsed as VOPD which does not accept src2.

  if (II[VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 ||

      II[VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32)

    return false;


  // If any modifiers are set this cannot be VOPD.

  for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,

                      OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,

                      OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {

    int I = getNamedOperandIdx(Opcode, OpName);

    if (I == -1)

      continue;

    if (Inst.getOperand(I).getImm())

      return false;

  }


  return !tryVOPD3(Inst);

}


// VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD

// form but switch to VOPD3 otherwise.

bool AMDGPUAsmParser::tryAnotherVOPDEncoding(const MCInst &Inst) {

  const unsigned Opcode = Inst.getOpcode();

  if (!isGFX1250() || !isVOPD(Opcode))

    return false;


  if (MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3)

    return tryVOPD(Inst);

  return tryVOPD3(Inst);

}


bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {


  const unsigned Opc = Inst.getOpcode();

  const MCInstrDesc &Desc = MII.get(Opc);


  if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {

    int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);

    assert(ClampIdx != -1);

    return Inst.getOperand(ClampIdx).getImm() == 0;

  }


  return true;

}


constexpr uint64_t MIMGFlags =

    SIInstrFlags::MIMG | SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE;


bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,

                                           const SMLoc &IDLoc) {


  const unsigned Opc = Inst.getOpcode();

  const MCInstrDesc &Desc = MII.get(Opc);


  if ((Desc.TSFlags & MIMGFlags) == 0)

    return true;


  int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);

  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);

  int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);


  if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample

    return true;


  if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray

    return true;


  unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);

  unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;

  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;

  if (DMask == 0)

    DMask = 1;


  bool IsPackedD16 = false;

  unsigned DataSize =

      (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);

  if (hasPackedD16()) {

    int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);

    IsPackedD16 = D16Idx >= 0;

    if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())

      DataSize = (DataSize + 1) / 2;

  }


  if ((VDataSize / 4) == DataSize + TFESize)

    return true;


  StringRef Modifiers;

  if (isGFX90A())

    Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";

  else

    Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";


  Error(IDLoc, Twine("image data size does not match ") + Modifiers);

  return false;

}


bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,

                                           const SMLoc &IDLoc) {

  const unsigned Opc = Inst.getOpcode();

  const MCInstrDesc &Desc = MII.get(Opc);


  if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())

    return true;


  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);


  const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =

      AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);

  int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);

  AMDGPU::OpName RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG)

                                  ? AMDGPU::OpName::srsrc

                                  : AMDGPU::OpName::rsrc;

  int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);

  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);

  int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);


  assert(VAddr0Idx != -1);

  assert(SrsrcIdx != -1);

  assert(SrsrcIdx > VAddr0Idx);


  bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());

  if (BaseOpcode->BVH) {

    if (IsA16 == BaseOpcode->A16)

      return true;

    Error(IDLoc, "image address size does not match a16");

    return false;

  }


  unsigned Dim = Inst.getOperand(DimIdx).getImm();

  const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);

  bool IsNSA = SrsrcIdx - VAddr0Idx > 1;

  unsigned ActualAddrSize =

      IsNSA ? SrsrcIdx - VAddr0Idx

            : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;


  unsigned ExpectedAddrSize =

      AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());


  if (IsNSA) {

    if (hasPartialNSAEncoding() &&

        ExpectedAddrSize >

            getNSAMaxSize(Desc.TSFlags & SIInstrFlags::VSAMPLE)) {

      int VAddrLastIdx = SrsrcIdx - 1;

      unsigned VAddrLastSize =

          AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;


      ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;

    }

  } else {

    if (ExpectedAddrSize > 12)

      ExpectedAddrSize = 16;


    // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.

    // This provides backward compatibility for assembly created

    // before 160b/192b/224b types were directly supported.

    if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))

      return true;

  }


  if (ActualAddrSize == ExpectedAddrSize)

    return true;


  Error(IDLoc, "image address size does not match dim and a16");

  return false;

}


bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {


  const unsigned Opc = Inst.getOpcode();

  const MCInstrDesc &Desc = MII.get(Opc);


  if ((Desc.TSFlags & MIMGFlags) == 0)

    return true;

  if (!Desc.mayLoad() || !Desc.mayStore())

    return true; // Not atomic


  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);

  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;


  // This is an incomplete check because image_atomic_cmpswap

  // may only use 0x3 and 0xf while other atomic operations

  // may use 0x1 and 0x3. However these limitations are

  // verified when we check that dmask matches dst size.

  return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;

}


bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {


  const unsigned Opc = Inst.getOpcode();

  const MCInstrDesc &Desc = MII.get(Opc);


  if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)

    return true;


  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);

  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;


  // GATHER4 instructions use dmask in a different fashion compared to

  // other MIMG instructions. The only useful DMASK values are

  // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns

  // (red,red,red,red) etc.) The ISA document doesn't mention

  // this.

  return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;

}


bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst,

                                      const OperandVector &Operands) {

  if (!isGFX10Plus())

    return true;


  const unsigned Opc = Inst.getOpcode();

  const MCInstrDesc &Desc = MII.get(Opc);


  if ((Desc.TSFlags & MIMGFlags) == 0)

    return true;


  // image_bvh_intersect_ray instructions do not have dim

  if (AMDGPU::getMIMGBaseOpcode(Opc)->BVH)

    return true;


  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {

    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);

    if (Op.isDim())

      return true;

  }

  return false;

}


bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {

  const unsigned Opc = Inst.getOpcode();

  const MCInstrDesc &Desc = MII.get(Opc);


  if ((Desc.TSFlags & MIMGFlags) == 0)

    return true;


  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);

  const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =

      AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);


  if (!BaseOpcode->MSAA)

    return true;


  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);

  assert(DimIdx != -1);


  unsigned Dim = Inst.getOperand(DimIdx).getImm();

  const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);


  return DimInfo->MSAA;

}


static bool IsMovrelsSDWAOpcode(const unsigned Opcode)

{

  switch (Opcode) {

  case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:

  case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:

  case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:

    return true;

  default:

    return false;

  }

}


// movrels* opcodes should only allow VGPRS as src0.

// This is specified in .td description for vop1/vop3,

// but sdwa is handled differently. See isSDWAOperand.

bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,

                                      const OperandVector &Operands) {


  const unsigned Opc = Inst.getOpcode();

  const MCInstrDesc &Desc = MII.get(Opc);


  if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))

    return true;


  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);

  assert(Src0Idx != -1);


  SMLoc ErrLoc;

  const MCOperand &Src0 = Inst.getOperand(Src0Idx);

  if (Src0.isReg()) {

    auto Reg = mc2PseudoReg(Src0.getReg());

    const MCRegisterInfo *TRI = getContext().getRegisterInfo();

    if (!isSGPR(Reg, TRI))

      return true;

    ErrLoc = getRegLoc(Reg, Operands);

  } else {

    ErrLoc = getConstLoc(Operands);

  }


  Error(ErrLoc, "source operand must be a VGPR");

  return false;

}


bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,

                                          const OperandVector &Operands) {


  const unsigned Opc = Inst.getOpcode();


  if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)

    return true;


  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);

  assert(Src0Idx != -1);


  const MCOperand &Src0 = Inst.getOperand(Src0Idx);

  if (!Src0.isReg())

    return true;


  auto Reg = mc2PseudoReg(Src0.getReg());

  const MCRegisterInfo *TRI = getContext().getRegisterInfo();

  if (!isGFX90A() && isSGPR(Reg, TRI)) {

    Error(getRegLoc(Reg, Operands),

          "source operand must be either a VGPR or an inline constant");

    return false;

  }


  return true;

}


bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,

                                      const OperandVector &Operands) {

  unsigned Opcode = Inst.getOpcode();

  const MCInstrDesc &Desc = MII.get(Opcode);


  if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||

      !getFeatureBits()[FeatureMFMAInlineLiteralBug])

    return true;


  const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);

  if (Src2Idx == -1)

    return true;


  if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {

    Error(getConstLoc(Operands),

          "inline constants are not allowed for this operand");

    return false;

  }


  return true;

}


bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,

                                   const OperandVector &Operands) {

  const unsigned Opc = Inst.getOpcode();

  const MCInstrDesc &Desc = MII.get(Opc);


  if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)

    return true;


  int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);

  if (BlgpIdx != -1) {

    if (const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(Opc)) {

      int CbszIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);


      unsigned CBSZ = Inst.getOperand(CbszIdx).getImm();

      unsigned BLGP = Inst.getOperand(BlgpIdx).getImm();


      // Validate the correct register size was used for the floating point

      // format operands


      bool Success = true;

      if (Info->NumRegsSrcA != mfmaScaleF8F6F4FormatToNumRegs(CBSZ)) {

        int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);

        Error(getRegLoc(mc2PseudoReg(Inst.getOperand(Src0Idx).getReg()),

                        Operands),

              "wrong register tuple size for cbsz value " + Twine(CBSZ));

        Success = false;

      }


      if (Info->NumRegsSrcB != mfmaScaleF8F6F4FormatToNumRegs(BLGP)) {

        int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);

        Error(getRegLoc(mc2PseudoReg(Inst.getOperand(Src1Idx).getReg()),

                        Operands),

              "wrong register tuple size for blgp value " + Twine(BLGP));

        Success = false;

      }


      return Success;

    }

  }


  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);

  if (Src2Idx == -1)

    return true;


  const MCOperand &Src2 = Inst.getOperand(Src2Idx);

  if (!Src2.isReg())

    return true;


  MCRegister Src2Reg = Src2.getReg();

  MCRegister DstReg = Inst.getOperand(0).getReg();

  if (Src2Reg == DstReg)

    return true;


  const MCRegisterInfo *TRI = getContext().getRegisterInfo();

  if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)

    return true;


  if (TRI->regsOverlap(Src2Reg, DstReg)) {

    Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),

          "source 2 operand must not partially overlap with dst");

    return false;

  }


  return true;

}


bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {

  switch (Inst.getOpcode()) {

  default:

    return true;

  case V_DIV_SCALE_F32_gfx6_gfx7:

  case V_DIV_SCALE_F32_vi:

  case V_DIV_SCALE_F32_gfx10:

  case V_DIV_SCALE_F64_gfx6_gfx7:

  case V_DIV_SCALE_F64_vi:

  case V_DIV_SCALE_F64_gfx10:

    break;

  }


  // TODO: Check that src0 = src1 or src2.


  for (auto Name : {AMDGPU::OpName::src0_modifiers,

                    AMDGPU::OpName::src2_modifiers,

                    AMDGPU::OpName::src2_modifiers}) {

    if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))

            .getImm() &

        SISrcMods::ABS) {

      return false;

    }

  }


  return true;

}


bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {


  const unsigned Opc = Inst.getOpcode();

  const MCInstrDesc &Desc = MII.get(Opc);


  if ((Desc.TSFlags & MIMGFlags) == 0)

    return true;


  int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);

  if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {

    if (isCI() || isSI())

      return false;

  }


  return true;

}


bool AMDGPUAsmParser::validateTensorR128(const MCInst &Inst) {

  const unsigned Opc = Inst.getOpcode();

  const MCInstrDesc &Desc = MII.get(Opc);


  if ((Desc.TSFlags & SIInstrFlags::TENSOR_CNT) == 0)

    return true;


  int R128Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);


  return R128Idx < 0 || !Inst.getOperand(R128Idx).getImm();

}


static bool IsRevOpcode(const unsigned Opcode)

{

  switch (Opcode) {

  case AMDGPU::V_SUBREV_F32_e32:

  case AMDGPU::V_SUBREV_F32_e64:

  case AMDGPU::V_SUBREV_F32_e32_gfx10:

  case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:

  case AMDGPU::V_SUBREV_F32_e32_vi:

  case AMDGPU::V_SUBREV_F32_e64_gfx10:

  case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:

  case AMDGPU::V_SUBREV_F32_e64_vi:


  case AMDGPU::V_SUBREV_CO_U32_e32:

  case AMDGPU::V_SUBREV_CO_U32_e64:

  case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:

  case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:


  case AMDGPU::V_SUBBREV_U32_e32:

  case AMDGPU::V_SUBBREV_U32_e64:

  case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:

  case AMDGPU::V_SUBBREV_U32_e32_vi:

  case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:

  case AMDGPU::V_SUBBREV_U32_e64_vi:


  case AMDGPU::V_SUBREV_U32_e32:

  case AMDGPU::V_SUBREV_U32_e64:

  case AMDGPU::V_SUBREV_U32_e32_gfx9:

  case AMDGPU::V_SUBREV_U32_e32_vi:

  case AMDGPU::V_SUBREV_U32_e64_gfx9:

  case AMDGPU::V_SUBREV_U32_e64_vi:


  case AMDGPU::V_SUBREV_F16_e32:

  case AMDGPU::V_SUBREV_F16_e64:

  case AMDGPU::V_SUBREV_F16_e32_gfx10:

  case AMDGPU::V_SUBREV_F16_e32_vi:

  case AMDGPU::V_SUBREV_F16_e64_gfx10:

  case AMDGPU::V_SUBREV_F16_e64_vi:


  case AMDGPU::V_SUBREV_U16_e32:

  case AMDGPU::V_SUBREV_U16_e64:

  case AMDGPU::V_SUBREV_U16_e32_vi:

  case AMDGPU::V_SUBREV_U16_e64_vi:


  case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:

  case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:

  case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:


  case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:

  case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:


  case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:

  case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:


  case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:

  case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:


  case AMDGPU::V_LSHRREV_B32_e32:

  case AMDGPU::V_LSHRREV_B32_e64:

  case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:

  case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:

  case AMDGPU::V_LSHRREV_B32_e32_vi:

  case AMDGPU::V_LSHRREV_B32_e64_vi:

  case AMDGPU::V_LSHRREV_B32_e32_gfx10:

  case AMDGPU::V_LSHRREV_B32_e64_gfx10:


  case AMDGPU::V_ASHRREV_I32_e32:

  case AMDGPU::V_ASHRREV_I32_e64:

  case AMDGPU::V_ASHRREV_I32_e32_gfx10:

  case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:

  case AMDGPU::V_ASHRREV_I32_e32_vi:

  case AMDGPU::V_ASHRREV_I32_e64_gfx10:

  case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:

  case AMDGPU::V_ASHRREV_I32_e64_vi:


  case AMDGPU::V_LSHLREV_B32_e32:

  case AMDGPU::V_LSHLREV_B32_e64:

  case AMDGPU::V_LSHLREV_B32_e32_gfx10:

  case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:

  case AMDGPU::V_LSHLREV_B32_e32_vi:

  case AMDGPU::V_LSHLREV_B32_e64_gfx10:

  case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:

  case AMDGPU::V_LSHLREV_B32_e64_vi:


  case AMDGPU::V_LSHLREV_B16_e32:

  case AMDGPU::V_LSHLREV_B16_e64:

  case AMDGPU::V_LSHLREV_B16_e32_vi:

  case AMDGPU::V_LSHLREV_B16_e64_vi:

  case AMDGPU::V_LSHLREV_B16_gfx10:


  case AMDGPU::V_LSHRREV_B16_e32:

  case AMDGPU::V_LSHRREV_B16_e64:

  case AMDGPU::V_LSHRREV_B16_e32_vi:

  case AMDGPU::V_LSHRREV_B16_e64_vi:

  case AMDGPU::V_LSHRREV_B16_gfx10:


  case AMDGPU::V_ASHRREV_I16_e32:

  case AMDGPU::V_ASHRREV_I16_e64:

  case AMDGPU::V_ASHRREV_I16_e32_vi:

  case AMDGPU::V_ASHRREV_I16_e64_vi:

  case AMDGPU::V_ASHRREV_I16_gfx10:


  case AMDGPU::V_LSHLREV_B64_e64:

  case AMDGPU::V_LSHLREV_B64_gfx10:

  case AMDGPU::V_LSHLREV_B64_vi:


  case AMDGPU::V_LSHRREV_B64_e64:

  case AMDGPU::V_LSHRREV_B64_gfx10:

  case AMDGPU::V_LSHRREV_B64_vi:


  case AMDGPU::V_ASHRREV_I64_e64:

  case AMDGPU::V_ASHRREV_I64_gfx10:

  case AMDGPU::V_ASHRREV_I64_vi:


  case AMDGPU::V_PK_LSHLREV_B16:

  case AMDGPU::V_PK_LSHLREV_B16_gfx10:

  case AMDGPU::V_PK_LSHLREV_B16_vi:


  case AMDGPU::V_PK_LSHRREV_B16:

  case AMDGPU::V_PK_LSHRREV_B16_gfx10:

  case AMDGPU::V_PK_LSHRREV_B16_vi:

  case AMDGPU::V_PK_ASHRREV_I16:

  case AMDGPU::V_PK_ASHRREV_I16_gfx10:

  case AMDGPU::V_PK_ASHRREV_I16_vi:

    return true;

  default:

    return false;

  }

}


std::optional<StringRef>

AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {


  using namespace SIInstrFlags;

  const unsigned Opcode = Inst.getOpcode();

  const MCInstrDesc &Desc = MII.get(Opcode);


  // lds_direct register is defined so that it can be used

  // with 9-bit operands only. Ignore encodings which do not accept these.

  const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;

  if ((Desc.TSFlags & Enc) == 0)

    return std::nullopt;


  for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {

    auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);

    if (SrcIdx == -1)

      break;

    const auto &Src = Inst.getOperand(SrcIdx);

    if (Src.isReg() && Src.getReg() == LDS_DIRECT) {


      if (isGFX90A() || isGFX11Plus())

        return StringRef("lds_direct is not supported on this GPU");


      if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))

        return StringRef("lds_direct cannot be used with this instruction");


      if (SrcName != OpName::src0)

        return StringRef("lds_direct may be used as src0 only");

    }

  }


  return std::nullopt;

}


SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {

  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {

    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);

    if (Op.isFlatOffset())

      return Op.getStartLoc();

  }

  return getLoc();

}


bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,

                                     const OperandVector &Operands) {

  auto Opcode = Inst.getOpcode();

  auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);

  if (OpNum == -1)

    return true;


  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;

  if ((TSFlags & SIInstrFlags::FLAT))

    return validateFlatOffset(Inst, Operands);


  if ((TSFlags & SIInstrFlags::SMRD))

    return validateSMEMOffset(Inst, Operands);


  const auto &Op = Inst.getOperand(OpNum);

  // GFX12+ buffer ops: InstOffset is signed 24, but must not be a negative.

  if (isGFX12Plus() &&

      (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {

    const unsigned OffsetSize = 24;

    if (!isUIntN(OffsetSize - 1, Op.getImm())) {

      Error(getFlatOffsetLoc(Operands),

            Twine("expected a ") + Twine(OffsetSize - 1) +

                "-bit unsigned offset for buffer ops");

      return false;

    }

  } else {

    const unsigned OffsetSize = 16;

    if (!isUIntN(OffsetSize, Op.getImm())) {

      Error(getFlatOffsetLoc(Operands),

            Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");

      return false;

    }

  }

  return true;

}


bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,

                                         const OperandVector &Operands) {

  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;

  if ((TSFlags & SIInstrFlags::FLAT) == 0)

    return true;


  auto Opcode = Inst.getOpcode();

  auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);

  assert(OpNum != -1);


  const auto &Op = Inst.getOperand(OpNum);

  if (!hasFlatOffsets() && Op.getImm() != 0) {

    Error(getFlatOffsetLoc(Operands),

          "flat offset modifier is not supported on this GPU");

    return false;

  }


  // For pre-GFX12 FLAT instructions the offset must be positive;

  // MSB is ignored and forced to zero.

  unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());

  bool AllowNegative =

      (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) ||

      isGFX12Plus();

  if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {

    Error(getFlatOffsetLoc(Operands),

          Twine("expected a ") +

              (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"

                             : Twine(OffsetSize - 1) + "-bit unsigned offset"));

    return false;

  }


  return true;

}


SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {

  // Start with second operand because SMEM Offset cannot be dst or src0.

  for (unsigned i = 2, e = Operands.size(); i != e; ++i) {

    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);

    if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())

      return Op.getStartLoc();

  }

  return getLoc();

}


bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,

                                         const OperandVector &Operands) {

  if (isCI() || isSI())

    return true;


  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;

  if ((TSFlags & SIInstrFlags::SMRD) == 0)

    return true;


  auto Opcode = Inst.getOpcode();

  auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);

  if (OpNum == -1)

    return true;


  const auto &Op = Inst.getOperand(OpNum);

  if (!Op.isImm())

    return true;


  uint64_t Offset = Op.getImm();

  bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);

  if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||

      AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))

    return true;


  Error(getSMEMOffsetLoc(Operands),

        isGFX12Plus() && IsBuffer

            ? "expected a 23-bit unsigned offset for buffer ops"

        : isGFX12Plus()        ? "expected a 24-bit signed offset"

        : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"

                               : "expected a 21-bit signed offset");


  return false;

}


bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {

  unsigned Opcode = Inst.getOpcode();

  const MCInstrDesc &Desc = MII.get(Opcode);

  if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))

    return true;


  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);

  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);


  const int OpIndices[] = { Src0Idx, Src1Idx };


  unsigned NumExprs = 0;

  unsigned NumLiterals = 0;

  uint64_t LiteralValue;


  for (int OpIdx : OpIndices) {

    if (OpIdx == -1) break;


    const MCOperand &MO = Inst.getOperand(OpIdx);

    // Exclude special imm operands (like that used by s_set_gpr_idx_on)

    if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {

      if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {

        uint64_t Value = static_cast<uint64_t>(MO.getImm());

        if (NumLiterals == 0 || LiteralValue != Value) {

          LiteralValue = Value;

          ++NumLiterals;

        }

      } else if (MO.isExpr()) {

        ++NumExprs;

      }

    }

  }


  return NumLiterals + NumExprs <= 1;

}


bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {

  const unsigned Opc = Inst.getOpcode();

  if (isPermlane16(Opc)) {

    int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);

    unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();


    if (OpSel & ~3)

      return false;

  }


  uint64_t TSFlags = MII.get(Opc).TSFlags;


  if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {

    int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);

    if (OpSelIdx != -1) {

      if (Inst.getOperand(OpSelIdx).getImm() != 0)

        return false;

    }

    int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);

    if (OpSelHiIdx != -1) {

      if (Inst.getOperand(OpSelHiIdx).getImm() != -1)

        return false;

    }

  }


  // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).

  if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&

      (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {

    int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);

    unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();

    if (OpSel & 3)

      return false;

  }


  // Packed math FP32 instructions typically accept SGPRs or VGPRs as source

  // operands. On gfx12+, if a source operand uses SGPRs, the HW can only read

  // the first SGPR and use it for both the low and high operations.

  if (isPackedFP32Inst(Opc) && isGFX12Plus()) {

    int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);

    int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);

    int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);

    int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);


    const MCOperand &Src0 = Inst.getOperand(Src0Idx);

    const MCOperand &Src1 = Inst.getOperand(Src1Idx);

    unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();

    unsigned OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();


    const MCRegisterInfo *TRI = getContext().getRegisterInfo();


    auto VerifyOneSGPR = [OpSel, OpSelHi](unsigned Index) -> bool {

      unsigned Mask = 1U << Index;

      return ((OpSel & Mask) == 0) && ((OpSelHi & Mask) == 0);

    };


    if (Src0.isReg() && isSGPR(Src0.getReg(), TRI) &&

        !VerifyOneSGPR(/*Index=*/0))

      return false;

    if (Src1.isReg() && isSGPR(Src1.getReg(), TRI) &&

        !VerifyOneSGPR(/*Index=*/1))

      return false;


    int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);

    if (Src2Idx != -1) {

      const MCOperand &Src2 = Inst.getOperand(Src2Idx);

      if (Src2.isReg() && isSGPR(Src2.getReg(), TRI) &&

          !VerifyOneSGPR(/*Index=*/2))

        return false;

    }

  }


  return true;

}


bool AMDGPUAsmParser::validateTrue16OpSel(const MCInst &Inst) {

  if (!hasTrue16Insts())

    return true;

  const MCRegisterInfo *MRI = getMRI();

  const unsigned Opc = Inst.getOpcode();

  int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);

  if (OpSelIdx == -1)

    return true;

  unsigned OpSelOpValue = Inst.getOperand(OpSelIdx).getImm();

  // If the value is 0 we could have a default OpSel Operand, so conservatively

  // allow it.

  if (OpSelOpValue == 0)

    return true;

  unsigned OpCount = 0;

  for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,

                                AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {

    int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), OpName);

    if (OpIdx == -1)

      continue;

    const MCOperand &Op = Inst.getOperand(OpIdx);

    if (Op.isReg() &&

        MRI->getRegClass(AMDGPU::VGPR_16RegClassID).contains(Op.getReg())) {

      bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Op.getReg(), *MRI);

      bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);

      if (OpSelOpIsHi != VGPRSuffixIsHi)

        return false;

    }

    ++OpCount;

  }


  return true;

}


bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, AMDGPU::OpName OpName) {

  assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);


  const unsigned Opc = Inst.getOpcode();

  uint64_t TSFlags = MII.get(Opc).TSFlags;


  // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)

  // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)

  // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)

  // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.

  if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&

      !(TSFlags & SIInstrFlags::IsSWMMAC))

    return true;


  int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);

  if (NegIdx == -1)

    return true;


  unsigned Neg = Inst.getOperand(NegIdx).getImm();


  // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed

  // on some src operands but not allowed on other.

  // It is convenient that such instructions don't have src_modifiers operand

  // for src operands that don't allow neg because they also don't allow opsel.


  const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,

                                     AMDGPU::OpName::src1_modifiers,

                                     AMDGPU::OpName::src2_modifiers};


  for (unsigned i = 0; i < 3; ++i) {

    if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {

      if (Neg & (1 << i))

        return false;

    }

  }


  return true;

}


bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,

                                  const OperandVector &Operands) {

  const unsigned Opc = Inst.getOpcode();

  int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);

  if (DppCtrlIdx >= 0) {

    unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();


    if (!AMDGPU::isLegalDPALU_DPPControl(getSTI(), DppCtrl) &&

        AMDGPU::isDPALU_DPP(MII.get(Opc), getSTI())) {

      // DP ALU DPP is supported for row_newbcast only on GFX9* and row_share

      // only on GFX12.

      SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);

      Error(S, isGFX12() ? "DP ALU dpp only supports row_share"

                         : "DP ALU dpp only supports row_newbcast");

      return false;

    }

  }


  int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);

  bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;


  if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {

    int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);

    if (Src1Idx >= 0) {

      const MCOperand &Src1 = Inst.getOperand(Src1Idx);

      const MCRegisterInfo *TRI = getContext().getRegisterInfo();

      if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {

        auto Reg = mc2PseudoReg(Inst.getOperand(Src1Idx).getReg());

        SMLoc S = getRegLoc(Reg, Operands);

        Error(S, "invalid operand for instruction");

        return false;

      }

      if (Src1.isImm()) {

        Error(getInstLoc(Operands),

              "src1 immediate operand invalid for instruction");

        return false;

      }

    }

  }


  return true;

}


// Check if VCC register matches wavefront size

bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const {

  auto FB = getFeatureBits();

  return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||

    (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);

}


// One unique literal can be used. VOP3 literal is only allowed in GFX10+

bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,

                                         const OperandVector &Operands) {

  unsigned Opcode = Inst.getOpcode();

  const MCInstrDesc &Desc = MII.get(Opcode);

  bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;

  if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&

      !HasMandatoryLiteral && !isVOPD(Opcode))

    return true;


  OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);


  unsigned NumExprs = 0;

  unsigned NumLiterals = 0;

  uint64_t LiteralValue;


  for (int OpIdx : OpIndices) {

    if (OpIdx == -1)

      continue;


    const MCOperand &MO = Inst.getOperand(OpIdx);

    if (!MO.isImm() && !MO.isExpr())

      continue;

    if (!isSISrcOperand(Desc, OpIdx))

      continue;


    if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {

      uint64_t Value = static_cast<uint64_t>(MO.getImm());

      bool IsForcedFP64 =

          Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_KIMM64 ||

          (Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_REG_IMM_FP64 &&

           HasMandatoryLiteral);

      bool IsFP64 = (IsForcedFP64 || AMDGPU::isSISrcFPOperand(Desc, OpIdx)) &&

                    AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;

      bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);


      if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value) &&

          !IsForcedFP64 && (!has64BitLiterals() || Desc.getSize() != 4)) {

        Error(getLitLoc(Operands), "invalid operand for instruction");

        return false;

      }


      if (IsFP64 && IsValid32Op && !IsForcedFP64)

        Value = Hi_32(Value);


      if (NumLiterals == 0 || LiteralValue != Value) {

        LiteralValue = Value;

        ++NumLiterals;

      }

    } else if (MO.isExpr()) {

      ++NumExprs;

    }

  }

  NumLiterals += NumExprs;


  if (!NumLiterals)

    return true;


  if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {

    Error(getLitLoc(Operands), "literal operands are not supported");

    return false;

  }


  if (NumLiterals > 1) {

    Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");

    return false;

  }


  return true;

}


// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.


static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name,

                         const MCRegisterInfo *MRI) {

  int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name);

  if (OpIdx < 0)

    return -1;


  const MCOperand &Op = Inst.getOperand(OpIdx);

  if (!Op.isReg())

    return -1;


  MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);

  auto Reg = Sub ? Sub : Op.getReg();

  const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);

  return AGPR32.contains(Reg) ? 1 : 0;

}


bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {

  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;

  if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |

                  SIInstrFlags::MTBUF | SIInstrFlags::MIMG |

                  SIInstrFlags::DS)) == 0)

    return true;


  AMDGPU::OpName DataName = (TSFlags & SIInstrFlags::DS)

                                ? AMDGPU::OpName::data0

                                : AMDGPU::OpName::vdata;


  const MCRegisterInfo *MRI = getMRI();

  int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);

  int DataAreg = IsAGPROperand(Inst, DataName, MRI);


  if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {

    int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);

    if (Data2Areg >= 0 && Data2Areg != DataAreg)

      return false;

  }


  auto FB = getFeatureBits();

  if (FB[AMDGPU::FeatureGFX90AInsts]) {

    if (DataAreg < 0 || DstAreg < 0)

      return true;

    return DstAreg == DataAreg;

  }


  return DstAreg < 1 && DataAreg < 1;

}


bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {

  auto FB = getFeatureBits();

  if (!FB[AMDGPU::FeatureRequiresAlignedVGPRs])

    return true;


  unsigned Opc = Inst.getOpcode();

  const MCRegisterInfo *MRI = getMRI();

  // DS_READ_B96_TR_B6 is the only DS instruction in GFX950, that allows

  // unaligned VGPR. All others only allow even aligned VGPRs.

  if (FB[AMDGPU::FeatureGFX90AInsts] && Opc == AMDGPU::DS_READ_B96_TR_B6_vi)

    return true;


  if (FB[AMDGPU::FeatureGFX1250Insts]) {

    switch (Opc) {

    default:

      break;

    case AMDGPU::DS_LOAD_TR6_B96:

    case AMDGPU::DS_LOAD_TR6_B96_gfx12:

      // DS_LOAD_TR6_B96 is the only DS instruction in GFX1250, that

      // allows unaligned VGPR. All others only allow even aligned VGPRs.

      return true;

    case AMDGPU::GLOBAL_LOAD_TR6_B96:

    case AMDGPU::GLOBAL_LOAD_TR6_B96_gfx1250: {

      // GLOBAL_LOAD_TR6_B96 is the only GLOBAL instruction in GFX1250, that

      // allows unaligned VGPR for vdst, but other operands still only allow

      // even aligned VGPRs.

      int VAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);

      if (VAddrIdx != -1) {

        const MCOperand &Op = Inst.getOperand(VAddrIdx);

        MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);

        if ((Sub - AMDGPU::VGPR0) & 1)

          return false;

      }

      return true;

    }

    case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR:

    case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR_gfx1250:

      return true;

    }

  }


  const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);

  const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);

  for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {

    const MCOperand &Op = Inst.getOperand(I);

    if (!Op.isReg())

      continue;


    MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);

    if (!Sub)

      continue;


    if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))

      return false;

    if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))

      return false;

  }


  return true;

}


SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {

  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {

    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);

    if (Op.isBLGP())

      return Op.getStartLoc();

  }

  return SMLoc();

}


bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,

                                   const OperandVector &Operands) {

  unsigned Opc = Inst.getOpcode();

  int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);

  if (BlgpIdx == -1)

    return true;

  SMLoc BLGPLoc = getBLGPLoc(Operands);

  if (!BLGPLoc.isValid())

    return true;

  bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");

  auto FB = getFeatureBits();

  bool UsesNeg = false;

  if (FB[AMDGPU::FeatureGFX940Insts]) {

    switch (Opc) {

    case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:

    case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:

    case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:

    case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:

      UsesNeg = true;

    }

  }


  if (IsNeg == UsesNeg)

    return true;


  Error(BLGPLoc,

        UsesNeg ? "invalid modifier: blgp is not supported"

                : "invalid modifier: neg is not supported");


  return false;

}


bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,

                                      const OperandVector &Operands) {

  if (!isGFX11Plus())

    return true;


  unsigned Opc = Inst.getOpcode();

  if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&

      Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&

      Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&

      Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)

    return true;


  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);

  assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());

  auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());

  if (Reg == AMDGPU::SGPR_NULL)

    return true;


  SMLoc RegLoc = getRegLoc(Reg, Operands);

  Error(RegLoc, "src0 must be null");

  return false;

}


bool AMDGPUAsmParser::validateDS(const MCInst &Inst,

                                 const OperandVector &Operands) {

  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;

  if ((TSFlags & SIInstrFlags::DS) == 0)

    return true;

  if (TSFlags & SIInstrFlags::GWS)

    return validateGWS(Inst, Operands);

  // Only validate GDS for non-GWS instructions.

  if (hasGDS())

    return true;

  int GDSIdx =

      AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);

  if (GDSIdx < 0)

    return true;

  unsigned GDS = Inst.getOperand(GDSIdx).getImm();

  if (GDS) {

    SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);

    Error(S, "gds modifier is not supported on this GPU");

    return false;

  }

  return true;

}


// gfx90a has an undocumented limitation:

// DS_GWS opcodes must use even aligned registers.

bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,

                                  const OperandVector &Operands) {

  if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])

    return true;


  int Opc = Inst.getOpcode();

  if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&

      Opc != AMDGPU::DS_GWS_SEMA_BR_vi)

    return true;


  const MCRegisterInfo *MRI = getMRI();

  const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);

  int Data0Pos =

      AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);

  assert(Data0Pos != -1);

  auto Reg = Inst.getOperand(Data0Pos).getReg();

  auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);

  if (RegIdx & 1) {

    SMLoc RegLoc = getRegLoc(Reg, Operands);

    Error(RegLoc, "vgpr must be even aligned");

    return false;

  }


  return true;

}


bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,

                                            const OperandVector &Operands,

                                            const SMLoc &IDLoc) {

  int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),

                                           AMDGPU::OpName::cpol);

  if (CPolPos == -1)

    return true;


  unsigned CPol = Inst.getOperand(CPolPos).getImm();


  if (!isGFX1250()) {

    if (CPol & CPol::SCAL) {

      SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);

      StringRef CStr(S.getPointer());

      S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);

      Error(S, "scale_offset is not supported on this GPU");

    }

    if (CPol & CPol::NV) {

      SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);

      StringRef CStr(S.getPointer());

      S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]);

      Error(S, "nv is not supported on this GPU");

    }

  }


  if ((CPol & CPol::SCAL) && !supportsScaleOffset(MII, Inst.getOpcode())) {

    SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);

    StringRef CStr(S.getPointer());

    S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);

    Error(S, "scale_offset is not supported for this instruction");

  }


  if (isGFX12Plus())

    return validateTHAndScopeBits(Inst, Operands, CPol);


  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;

  if (TSFlags & SIInstrFlags::SMRD) {

    if (CPol && (isSI() || isCI())) {

      SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);

      Error(S, "cache policy is not supported for SMRD instructions");

      return false;

    }

    if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {

      Error(IDLoc, "invalid cache policy for SMEM instruction");

      return false;

    }

  }


  if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {

    const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |

                                      SIInstrFlags::MTBUF | SIInstrFlags::MIMG |

                                      SIInstrFlags::FLAT;

    if (!(TSFlags & AllowSCCModifier)) {

      SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);

      StringRef CStr(S.getPointer());

      S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);

      Error(S,

            "scc modifier is not supported for this instruction on this GPU");

      return false;

    }

  }


  if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))

    return true;


  if (TSFlags & SIInstrFlags::IsAtomicRet) {

    if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {

      Error(IDLoc, isGFX940() ? "instruction must use sc0"

                              : "instruction must use glc");

      return false;

    }

  } else {

    if (CPol & CPol::GLC) {

      SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);

      StringRef CStr(S.getPointer());

      S = SMLoc::getFromPointer(

          &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);

      Error(S, isGFX940() ? "instruction must not use sc0"

                          : "instruction must not use glc");

      return false;

    }

  }


  return true;

}


bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,

                                             const OperandVector &Operands,

                                             const unsigned CPol) {

  const unsigned TH = CPol & AMDGPU::CPol::TH;

  const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;


  const unsigned Opcode = Inst.getOpcode();

  const MCInstrDesc &TID = MII.get(Opcode);


  auto PrintError = [&](StringRef Msg) {

    SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);

    Error(S, Msg);

    return false;

  };


  if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&

      (TID.TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF)) &&

      (!(TH & AMDGPU::CPol::TH_ATOMIC_RETURN)))

    return PrintError("instruction must use th:TH_ATOMIC_RETURN");


  if (TH == 0)

    return true;


  if ((TID.TSFlags & SIInstrFlags::SMRD) &&

      ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||

       (TH == AMDGPU::CPol::TH_NT_HT)))

    return PrintError("invalid th value for SMEM instruction");


  if (TH == AMDGPU::CPol::TH_BYPASS) {

    if ((Scope != AMDGPU::CPol::SCOPE_SYS &&

         CPol & AMDGPU::CPol::TH_REAL_BYPASS) ||

        (Scope == AMDGPU::CPol::SCOPE_SYS &&

         !(CPol & AMDGPU::CPol::TH_REAL_BYPASS)))

      return PrintError("scope and th combination is not valid");

  }


  unsigned THType = AMDGPU::getTemporalHintType(TID);

  if (THType == AMDGPU::CPol::TH_TYPE_ATOMIC) {

    if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))

      return PrintError("invalid th value for atomic instructions");

  } else if (THType == AMDGPU::CPol::TH_TYPE_STORE) {

    if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))

      return PrintError("invalid th value for store instructions");

  } else {

    if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))

      return PrintError("invalid th value for load instructions");

  }


  return true;

}


bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,

                                  const OperandVector &Operands) {

  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());

  if (Desc.mayStore() &&

      (Desc.TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {

    SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);

    if (Loc != getInstLoc(Operands)) {

      Error(Loc, "TFE modifier has no meaning for store instructions");

      return false;

    }

  }


  return true;

}


bool AMDGPUAsmParser::validateSetVgprMSB(const MCInst &Inst,

                                         const OperandVector &Operands) {

  if (Inst.getOpcode() != AMDGPU::S_SET_VGPR_MSB_gfx12)

    return true;


  int Simm16Pos =

      AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::simm16);

  if ((unsigned)Inst.getOperand(Simm16Pos).getImm() > 255) {

    SMLoc Loc = Operands[1]->getStartLoc();

    Error(Loc, "s_set_vgpr_msb accepts values in range [0..255]");

    return false;

  }


  return true;

}


bool AMDGPUAsmParser::validateWMMA(const MCInst &Inst,

                                   const OperandVector &Operands) {

  unsigned Opc = Inst.getOpcode();

  const MCRegisterInfo *TRI = getContext().getRegisterInfo();

  const MCInstrDesc &Desc = MII.get(Opc);


  auto validateFmt = [&](AMDGPU::OpName FmtOp, AMDGPU::OpName SrcOp) -> bool {

    int FmtIdx = AMDGPU::getNamedOperandIdx(Opc, FmtOp);

    if (FmtIdx == -1)

      return true;

    unsigned Fmt = Inst.getOperand(FmtIdx).getImm();

    int SrcIdx = AMDGPU::getNamedOperandIdx(Opc, SrcOp);

    unsigned RegSize =

        TRI->getRegClass(Desc.operands()[SrcIdx].RegClass).getSizeInBits();


    if (RegSize == AMDGPU::wmmaScaleF8F6F4FormatToNumRegs(Fmt) * 32)

      return true;


    static const char *FmtNames[] = {"MATRIX_FMT_FP8", "MATRIX_FMT_BF8",

                                     "MATRIX_FMT_FP6", "MATRIX_FMT_BF6",

                                     "MATRIX_FMT_FP4"};


    Error(getRegLoc(mc2PseudoReg(Inst.getOperand(SrcIdx).getReg()), Operands),

          "wrong register tuple size for " + Twine(FmtNames[Fmt]));

    return false;

  };


  return validateFmt(AMDGPU::OpName::matrix_a_fmt, AMDGPU::OpName::src0) &&

         validateFmt(AMDGPU::OpName::matrix_b_fmt, AMDGPU::OpName::src1);

}


bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,

                                          const SMLoc &IDLoc,

                                          const OperandVector &Operands) {

  if (auto ErrMsg = validateLdsDirect(Inst)) {

    Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);

    return false;

  }

  if (!validateTrue16OpSel(Inst)) {

    Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),

          "op_sel operand conflicts with 16-bit operand suffix");

    return false;

  }

  if (!validateSOPLiteral(Inst)) {

    Error(getLitLoc(Operands),

      "only one unique literal operand is allowed");

    return false;

  }

  if (!validateVOPLiteral(Inst, Operands)) {

    return false;

  }

  if (!validateConstantBusLimitations(Inst, Operands)) {

    return false;

  }

  if (!validateVOPD(Inst, Operands)) {

    return false;

  }

  if (!validateIntClampSupported(Inst)) {

    Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),

          "integer clamping is not supported on this GPU");

    return false;

  }

  if (!validateOpSel(Inst)) {

    Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),

      "invalid op_sel operand");

    return false;

  }

  if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {

    Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),

          "invalid neg_lo operand");

    return false;

  }

  if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {

    Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),

          "invalid neg_hi operand");

    return false;

  }

  if (!validateDPP(Inst, Operands)) {

    return false;

  }

  // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.

  if (!validateMIMGD16(Inst)) {

    Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),

      "d16 modifier is not supported on this GPU");

    return false;

  }

  if (!validateMIMGDim(Inst, Operands)) {

    Error(IDLoc, "missing dim operand");

    return false;

  }

  if (!validateTensorR128(Inst)) {

    Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),

          "instruction must set modifier r128=0");

    return false;

  }

  if (!validateMIMGMSAA(Inst)) {

    Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),

          "invalid dim; must be MSAA type");

    return false;

  }

  if (!validateMIMGDataSize(Inst, IDLoc)) {

    return false;

  }

  if (!validateMIMGAddrSize(Inst, IDLoc))

    return false;

  if (!validateMIMGAtomicDMask(Inst)) {

    Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),

      "invalid atomic image dmask");

    return false;

  }

  if (!validateMIMGGatherDMask(Inst)) {

    Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),

      "invalid image_gather dmask: only one bit must be set");

    return false;

  }

  if (!validateMovrels(Inst, Operands)) {

    return false;

  }

  if (!validateOffset(Inst, Operands)) {

    return false;

  }

  if (!validateMAIAccWrite(Inst, Operands)) {

    return false;

  }

  if (!validateMAISrc2(Inst, Operands)) {

    return false;

  }

  if (!validateMFMA(Inst, Operands)) {

    return false;

  }

  if (!validateCoherencyBits(Inst, Operands, IDLoc)) {

    return false;

  }


  if (!validateAGPRLdSt(Inst)) {

    Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]

    ? "invalid register class: data and dst should be all VGPR or AGPR"

    : "invalid register class: agpr loads and stores not supported on this GPU"

    );

    return false;

  }

  if (!validateVGPRAlign(Inst)) {

    Error(IDLoc,

      "invalid register class: vgpr tuples must be 64 bit aligned");

    return false;

  }

  if (!validateDS(Inst, Operands)) {

    return false;

  }


  if (!validateBLGP(Inst, Operands)) {

    return false;

  }


  if (!validateDivScale(Inst)) {

    Error(IDLoc, "ABS not allowed in VOP3B instructions");

    return false;

  }

  if (!validateWaitCnt(Inst, Operands)) {

    return false;

  }

  if (!validateTFE(Inst, Operands)) {

    return false;

  }

  if (!validateSetVgprMSB(Inst, Operands)) {

    return false;

  }

  if (!validateWMMA(Inst, Operands)) {

    return false;

  }


  return true;

}


static std::string AMDGPUMnemonicSpellCheck(StringRef S,

                                            const FeatureBitset &FBS,

                                            unsigned VariantID = 0);


static bool AMDGPUCheckMnemonic(StringRef Mnemonic,

                                const FeatureBitset &AvailableFeatures,

                                unsigned VariantID);


bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,

                                       const FeatureBitset &FBS) {

  return isSupportedMnemo(Mnemo, FBS, getAllVariants());

}


bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,

                                       const FeatureBitset &FBS,

                                       ArrayRef<unsigned> Variants) {

  for (auto Variant : Variants) {

    if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))

      return true;

  }


  return false;

}


bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,

                                                  const SMLoc &IDLoc) {

  FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());


  // Check if requested instruction variant is supported.

  if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))

    return false;


  // This instruction is not supported.

  // Clear any other pending errors because they are no longer relevant.

  getParser().clearPendingErrors();


  // Requested instruction variant is not supported.

  // Check if any other variants are supported.

  StringRef VariantName = getMatchedVariantName();

  if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {

    return Error(IDLoc,

                 Twine(VariantName,

                       " variant of this instruction is not supported"));

  }


  // Check if this instruction may be used with a different wavesize.

  if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&

      !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {


    FeatureBitset FeaturesWS32 = getFeatureBits();

    FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)

        .flip(AMDGPU::FeatureWavefrontSize32);

    FeatureBitset AvailableFeaturesWS32 =

        ComputeAvailableFeatures(FeaturesWS32);


    if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))

      return Error(IDLoc, "instruction requires wavesize=32");

  }


  // Finally check if this instruction is supported on any other GPU.

  if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {

    return Error(IDLoc, "instruction not supported on this GPU");

  }


  // Instruction not supported on any GPU. Probably a typo.

  std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);

  return Error(IDLoc, "invalid instruction" + Suggestion);

}


static bool isInvalidVOPDY(const OperandVector &Operands,

                           uint64_t InvalidOprIdx) {

  assert(InvalidOprIdx < Operands.size());

  const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);

  if (Op.isToken() && InvalidOprIdx > 1) {

    const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);

    return PrevOp.isToken() && PrevOp.getToken() == "::";

  }

  return false;

}


bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,

                                              OperandVector &Operands,

                                              MCStreamer &Out,

                                              uint64_t &ErrorInfo,

                                              bool MatchingInlineAsm) {

  MCInst Inst;

  Inst.setLoc(IDLoc);

  unsigned Result = Match_Success;

  for (auto Variant : getMatchedVariants()) {

    uint64_t EI;

    auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,

                                  Variant);

    // We order match statuses from least to most specific. We use most specific

    // status as resulting

    // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature

    if (R == Match_Success || R == Match_MissingFeature ||

        (R == Match_InvalidOperand && Result != Match_MissingFeature) ||

        (R == Match_MnemonicFail && Result != Match_InvalidOperand &&

         Result != Match_MissingFeature)) {

      Result = R;

      ErrorInfo = EI;

    }

    if (R == Match_Success)

      break;

  }


  if (Result == Match_Success) {

    if (!validateInstruction(Inst, IDLoc, Operands)) {

      return true;

    }

    Out.emitInstruction(Inst, getSTI());

    return false;

  }


  StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();

  if (checkUnsupportedInstruction(Mnemo, IDLoc)) {

    return true;

  }


  switch (Result) {

  default: break;

  case Match_MissingFeature:

    // It has been verified that the specified instruction

    // mnemonic is valid. A match was found but it requires

    // features which are not supported on this GPU.

    return Error(IDLoc, "operands are not valid for this GPU or mode");


  case Match_InvalidOperand: {

    SMLoc ErrorLoc = IDLoc;

    if (ErrorInfo != ~0ULL) {

      if (ErrorInfo >= Operands.size()) {

        return Error(IDLoc, "too few operands for instruction");

      }

      ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();

      if (ErrorLoc == SMLoc())

        ErrorLoc = IDLoc;


      if (isInvalidVOPDY(Operands, ErrorInfo))

        return Error(ErrorLoc, "invalid VOPDY instruction");

    }

    return Error(ErrorLoc, "invalid operand for instruction");

  }


  case Match_MnemonicFail:

    llvm_unreachable("Invalid instructions should have been handled already");

  }

  llvm_unreachable("Implement any new match types added!");

}


bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {

  int64_t Tmp = -1;

  if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {

    return true;

  }

  if (getParser().parseAbsoluteExpression(Tmp)) {

    return true;

  }

  Ret = static_cast<uint32_t>(Tmp);

  return false;

}


bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {

  if (!getSTI().getTargetTriple().isAMDGCN())

    return TokError("directive only supported for amdgcn architecture");


  std::string TargetIDDirective;

  SMLoc TargetStart = getTok().getLoc();

  if (getParser().parseEscapedString(TargetIDDirective))

    return true;


  SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());

  if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)

    return getParser().Error(TargetRange.Start,

        (Twine(".amdgcn_target directive's target id ") +

         Twine(TargetIDDirective) +

         Twine(" does not match the specified target id ") +

         Twine(getTargetStreamer().getTargetID()->toString())).str());


  return false;

}


bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {

  return Error(Range.Start, "value out of range", Range);

}


bool AMDGPUAsmParser::calculateGPRBlocks(

    const FeatureBitset &Features, const MCExpr *VCCUsed,

    const MCExpr *FlatScrUsed, bool XNACKUsed,

    std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,

    SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,

    const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) {

  // TODO(scott.linder): These calculations are duplicated from

  // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.

  IsaVersion Version = getIsaVersion(getSTI().getCPU());

  MCContext &Ctx = getContext();


  const MCExpr *NumSGPRs = NextFreeSGPR;

  int64_t EvaluatedSGPRs;


  if (Version.Major >= 10)

    NumSGPRs = MCConstantExpr::create(0, Ctx);

  else {

    unsigned MaxAddressableNumSGPRs =

        IsaInfo::getAddressableNumSGPRs(&getSTI());


    if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && Version.Major >= 8 &&

        !Features.test(FeatureSGPRInitBug) &&

        static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)

      return OutOfRangeError(SGPRRange);


    const MCExpr *ExtraSGPRs =

        AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);

    NumSGPRs = MCBinaryExpr::createAdd(NumSGPRs, ExtraSGPRs, Ctx);


    if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&

        (Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&

        static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)

      return OutOfRangeError(SGPRRange);


    if (Features.test(FeatureSGPRInitBug))

      NumSGPRs =

          MCConstantExpr::create(IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG, Ctx);

  }


  // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:

  // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1

  auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,

                                unsigned Granule) -> const MCExpr * {

    const MCExpr *OneConst = MCConstantExpr::create(1ul, Ctx);

    const MCExpr *GranuleConst = MCConstantExpr::create(Granule, Ctx);

    const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);

    const MCExpr *AlignToGPR =

        AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);

    const MCExpr *DivGPR =

        MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);

    const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);

    return SubGPR;

  };


  VGPRBlocks = GetNumGPRBlocks(

      NextFreeVGPR,

      IsaInfo::getVGPREncodingGranule(&getSTI(), EnableWavefrontSize32));

  SGPRBlocks =

      GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(&getSTI()));


  return false;

}


bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {

  if (!getSTI().getTargetTriple().isAMDGCN())

    return TokError("directive only supported for amdgcn architecture");


  if (!isHsaAbi(getSTI()))

    return TokError("directive only supported for amdhsa OS");


  StringRef KernelName;

  if (getParser().parseIdentifier(KernelName))

    return true;


  AMDGPU::MCKernelDescriptor KD =

      AMDGPU::MCKernelDescriptor::getDefaultAmdhsaKernelDescriptor(

          &getSTI(), getContext());


  StringSet<> Seen;


  IsaVersion IVersion = getIsaVersion(getSTI().getCPU());


  const MCExpr *ZeroExpr = MCConstantExpr::create(0, getContext());

  const MCExpr *OneExpr = MCConstantExpr::create(1, getContext());


  SMRange VGPRRange;

  const MCExpr *NextFreeVGPR = ZeroExpr;

  const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext());

  const MCExpr *NamedBarCnt = ZeroExpr;

  uint64_t SharedVGPRCount = 0;

  uint64_t PreloadLength = 0;

  uint64_t PreloadOffset = 0;

  SMRange SGPRRange;

  const MCExpr *NextFreeSGPR = ZeroExpr;


  // Count the number of user SGPRs implied from the enabled feature bits.

  unsigned ImpliedUserSGPRCount = 0;


  // Track if the asm explicitly contains the directive for the user SGPR

  // count.

  std::optional<unsigned> ExplicitUserSGPRCount;

  const MCExpr *ReserveVCC = OneExpr;

  const MCExpr *ReserveFlatScr = OneExpr;

  std::optional<bool> EnableWavefrontSize32;


  while (true) {

    while (trySkipToken(AsmToken::EndOfStatement));


    StringRef ID;

    SMRange IDRange = getTok().getLocRange();

    if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))

      return true;


    if (ID == ".end_amdhsa_kernel")

      break;


    if (!Seen.insert(ID).second)

      return TokError(".amdhsa_ directives cannot be repeated");


    SMLoc ValStart = getLoc();

    const MCExpr *ExprVal;

    if (getParser().parseExpression(ExprVal))

      return true;

    SMLoc ValEnd = getLoc();

    SMRange ValRange = SMRange(ValStart, ValEnd);


    int64_t IVal = 0;

    uint64_t Val = IVal;

    bool EvaluatableExpr;

    if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {

      if (IVal < 0)

        return OutOfRangeError(ValRange);

      Val = IVal;

    }


#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \

  if (!isUInt<ENTRY##_WIDTH>(Val))                                             \

    return OutOfRangeError(RANGE);                                             \

  AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY,     \

                                       getContext());


// Some fields use the parsed value immediately which requires the expression to

// be solvable.

#define EXPR_RESOLVE_OR_ERROR(RESOLVED)                                        \

  if (!(RESOLVED))                                                             \

    return Error(IDRange.Start, "directive should have resolvable expression", \

                 IDRange);


    if (ID == ".amdhsa_group_segment_fixed_size") {

      if (!isUInt<sizeof(kernel_descriptor_t::group_segment_fixed_size) *

                  CHAR_BIT>(Val))

        return OutOfRangeError(ValRange);

      KD.group_segment_fixed_size = ExprVal;

    } else if (ID == ".amdhsa_private_segment_fixed_size") {

      if (!isUInt<sizeof(kernel_descriptor_t::private_segment_fixed_size) *

                  CHAR_BIT>(Val))

        return OutOfRangeError(ValRange);

      KD.private_segment_fixed_size = ExprVal;

    } else if (ID == ".amdhsa_kernarg_size") {

      if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))

        return OutOfRangeError(ValRange);

      KD.kernarg_size = ExprVal;

    } else if (ID == ".amdhsa_user_sgpr_count") {

      EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);

      ExplicitUserSGPRCount = Val;

    } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {

      EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);

      if (hasArchitectedFlatScratch())

        return Error(IDRange.Start,

                     "directive is not supported with architected flat scratch",

                     IDRange);

      PARSE_BITS_ENTRY(KD.kernel_code_properties,

                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,

                       ExprVal, ValRange);

      if (Val)

        ImpliedUserSGPRCount += 4;

    } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {

      EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);

      if (!hasKernargPreload())

        return Error(IDRange.Start, "directive requires gfx90a+", IDRange);


      if (Val > getMaxNumUserSGPRs())

        return OutOfRangeError(ValRange);

      PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,

                       ValRange);

      if (Val) {

        ImpliedUserSGPRCount += Val;

        PreloadLength = Val;

      }

    } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {

      EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);

      if (!hasKernargPreload())

        return Error(IDRange.Start, "directive requires gfx90a+", IDRange);


      if (Val >= 1024)

        return OutOfRangeError(ValRange);

      PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,

                       ValRange);

      if (Val)

        PreloadOffset = Val;

    } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {

      EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);

      PARSE_BITS_ENTRY(KD.kernel_code_properties,

                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,

                       ValRange);

      if (Val)

        ImpliedUserSGPRCount += 2;

    } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {

      EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);

      PARSE_BITS_ENTRY(KD.kernel_code_properties,

                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,

                       ValRange);

      if (Val)

        ImpliedUserSGPRCount += 2;

    } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {

      EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);

      PARSE_BITS_ENTRY(KD.kernel_code_properties,

                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,

                       ExprVal, ValRange);

      if (Val)

        ImpliedUserSGPRCount += 2;

    } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {

      EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);

      PARSE_BITS_ENTRY(KD.kernel_code_properties,

                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,

                       ValRange);

      if (Val)

        ImpliedUserSGPRCount += 2;

    } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {

      if (hasArchitectedFlatScratch())

        return Error(IDRange.Start,

                     "directive is not supported with architected flat scratch",

                     IDRange);

      EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);

      PARSE_BITS_ENTRY(KD.kernel_code_properties,

                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,

                       ExprVal, ValRange);

      if (Val)

        ImpliedUserSGPRCount += 2;

    } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {

      EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);

      PARSE_BITS_ENTRY(KD.kernel_code_properties,

                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,

                       ExprVal, ValRange);

      if (Val)

        ImpliedUserSGPRCount += 1;

    } else if (ID == ".amdhsa_wavefront_size32") {

      EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);

      if (IVersion.Major < 10)

        return Error(IDRange.Start, "directive requires gfx10+", IDRange);

      EnableWavefrontSize32 = Val;

      PARSE_BITS_ENTRY(KD.kernel_code_properties,

                       KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,

                       ValRange);

    } else if (ID == ".amdhsa_uses_dynamic_stack") {

      PARSE_BITS_ENTRY(KD.kernel_code_properties,

                       KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,

                       ValRange);

    } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {

      if (hasArchitectedFlatScratch())

        return Error(IDRange.Start,

                     "directive is not supported with architected flat scratch",

                     IDRange);

      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,

                       COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,

                       ValRange);

    } else if (ID == ".amdhsa_enable_private_segment") {

      if (!hasArchitectedFlatScratch())

        return Error(

            IDRange.Start,

            "directive is not supported without architected flat scratch",

            IDRange);

      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,

                       COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,

                       ValRange);

    } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {

      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,

                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,

                       ValRange);

    } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {

      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,

                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,

                       ValRange);

    } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {

      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,

                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,

                       ValRange);

    } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {

      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,

                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,

                       ValRange);

    } else if (ID == ".amdhsa_system_vgpr_workitem_id") {

      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,

                       COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,

                       ValRange);

    } else if (ID == ".amdhsa_next_free_vgpr") {

      VGPRRange = ValRange;

      NextFreeVGPR = ExprVal;

    } else if (ID == ".amdhsa_next_free_sgpr") {

      SGPRRange = ValRange;

      NextFreeSGPR = ExprVal;

    } else if (ID == ".amdhsa_accum_offset") {

      if (!isGFX90A())

        return Error(IDRange.Start, "directive requires gfx90a+", IDRange);

      AccumOffset = ExprVal;

    } else if (ID == ".amdhsa_named_barrier_count") {

      if (!isGFX1250())

        return Error(IDRange.Start, "directive requires gfx1250+", IDRange);

      NamedBarCnt = ExprVal;

    } else if (ID == ".amdhsa_reserve_vcc") {

      if (EvaluatableExpr && !isUInt<1>(Val))

        return OutOfRangeError(ValRange);

      ReserveVCC = ExprVal;

    } else if (ID == ".amdhsa_reserve_flat_scratch") {

      if (IVersion.Major < 7)

        return Error(IDRange.Start, "directive requires gfx7+", IDRange);

      if (hasArchitectedFlatScratch())

        return Error(IDRange.Start,

                     "directive is not supported with architected flat scratch",

                     IDRange);

      if (EvaluatableExpr && !isUInt<1>(Val))

        return OutOfRangeError(ValRange);

      ReserveFlatScr = ExprVal;

    } else if (ID == ".amdhsa_reserve_xnack_mask") {

      if (IVersion.Major < 8)

        return Error(IDRange.Start, "directive requires gfx8+", IDRange);

      if (!isUInt<1>(Val))

        return OutOfRangeError(ValRange);

      if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())

        return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",

                                 IDRange);

    } else if (ID == ".amdhsa_float_round_mode_32") {

      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,

                       COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,

                       ValRange);

    } else if (ID == ".amdhsa_float_round_mode_16_64") {

      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,

                       COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,

                       ValRange);

    } else if (ID == ".amdhsa_float_denorm_mode_32") {

      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,

                       COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,

                       ValRange);

    } else if (ID == ".amdhsa_float_denorm_mode_16_64") {

      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,

                       COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,

                       ValRange);

    } else if (ID == ".amdhsa_dx10_clamp") {

      if (IVersion.Major >= 12)

        return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);

      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,

                       COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,

                       ValRange);

    } else if (ID == ".amdhsa_ieee_mode") {

      if (IVersion.Major >= 12)

        return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);

      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,

                       COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,

                       ValRange);

    } else if (ID == ".amdhsa_fp16_overflow") {

      if (IVersion.Major < 9)

        return Error(IDRange.Start, "directive requires gfx9+", IDRange);

      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,

                       COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,

                       ValRange);

    } else if (ID == ".amdhsa_tg_split") {

      if (!isGFX90A())

        return Error(IDRange.Start, "directive requires gfx90a+", IDRange);

      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,

                       ExprVal, ValRange);

    } else if (ID == ".amdhsa_workgroup_processor_mode") {

      if (!supportsWGP(getSTI()))

        return Error(IDRange.Start,

                     "directive unsupported on " + getSTI().getCPU(), IDRange);

      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,

                       COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,

                       ValRange);

    } else if (ID == ".amdhsa_memory_ordered") {

      if (IVersion.Major < 10)

        return Error(IDRange.Start, "directive requires gfx10+", IDRange);

      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,

                       COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,

                       ValRange);

    } else if (ID == ".amdhsa_forward_progress") {

      if (IVersion.Major < 10)

        return Error(IDRange.Start, "directive requires gfx10+", IDRange);

      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,

                       COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,

                       ValRange);

    } else if (ID == ".amdhsa_shared_vgpr_count") {

      EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);

      if (IVersion.Major < 10 || IVersion.Major >= 12)

        return Error(IDRange.Start, "directive requires gfx10 or gfx11",

                     IDRange);

      SharedVGPRCount = Val;

      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,

                       COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,

                       ValRange);

    } else if (ID == ".amdhsa_inst_pref_size") {

      if (IVersion.Major < 11)

        return Error(IDRange.Start, "directive requires gfx11+", IDRange);

      if (IVersion.Major == 11) {

        PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,

                         COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,

                         ValRange);

      } else {

        PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,

                         COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,

                         ValRange);

      }

    } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {

      PARSE_BITS_ENTRY(

          KD.compute_pgm_rsrc2,

          COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,

          ExprVal, ValRange);

    } else if (ID == ".amdhsa_exception_fp_denorm_src") {

      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,

                       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,

                       ExprVal, ValRange);

    } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {

      PARSE_BITS_ENTRY(

          KD.compute_pgm_rsrc2,

          COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,

          ExprVal, ValRange);

    } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {

      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,

                       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,

                       ExprVal, ValRange);

    } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {

      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,

                       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,

                       ExprVal, ValRange);

    } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {

      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,

                       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,

                       ExprVal, ValRange);

    } else if (ID == ".amdhsa_exception_int_div_zero") {

      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,

                       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,

                       ExprVal, ValRange);

    } else if (ID == ".amdhsa_round_robin_scheduling") {

      if (IVersion.Major < 12)

        return Error(IDRange.Start, "directive requires gfx12+", IDRange);

      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,

                       COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,

                       ValRange);

    } else {

      return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);

    }


#undef PARSE_BITS_ENTRY

  }


  if (!Seen.contains(".amdhsa_next_free_vgpr"))

    return TokError(".amdhsa_next_free_vgpr directive is required");


  if (!Seen.contains(".amdhsa_next_free_sgpr"))

    return TokError(".amdhsa_next_free_sgpr directive is required");


  unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);


  // Consider the case where the total number of UserSGPRs with trailing

  // allocated preload SGPRs, is greater than the number of explicitly

  // referenced SGPRs.

  if (PreloadLength) {

    MCContext &Ctx = getContext();

    NextFreeSGPR = AMDGPUMCExpr::createMax(

        {NextFreeSGPR, MCConstantExpr::create(UserSGPRCount, Ctx)}, Ctx);

  }


  const MCExpr *VGPRBlocks;

  const MCExpr *SGPRBlocks;

  if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,

                         getTargetStreamer().getTargetID()->isXnackOnOrAny(),

                         EnableWavefrontSize32, NextFreeVGPR,

                         VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,

                         SGPRBlocks))

    return true;


  int64_t EvaluatedVGPRBlocks;

  bool VGPRBlocksEvaluatable =

      VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);

  if (VGPRBlocksEvaluatable &&

      !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(

          static_cast<uint64_t>(EvaluatedVGPRBlocks))) {

    return OutOfRangeError(VGPRRange);

  }

  AMDGPU::MCKernelDescriptor::bits_set(

      KD.compute_pgm_rsrc1, VGPRBlocks,

      COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,

      COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());


  int64_t EvaluatedSGPRBlocks;

  if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&

      !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(

          static_cast<uint64_t>(EvaluatedSGPRBlocks)))

    return OutOfRangeError(SGPRRange);

  AMDGPU::MCKernelDescriptor::bits_set(

      KD.compute_pgm_rsrc1, SGPRBlocks,

      COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,

      COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());


  if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)

    return TokError("amdgpu_user_sgpr_count smaller than than implied by "

                    "enabled user SGPRs");


  if (isGFX1250()) {

    if (!isUInt<COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))

      return TokError("too many user SGPRs enabled");

    AMDGPU::MCKernelDescriptor::bits_set(

        KD.compute_pgm_rsrc2,

        MCConstantExpr::create(UserSGPRCount, getContext()),

        COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,

        COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT, getContext());

  } else {

    if (!isUInt<COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_WIDTH>(

            UserSGPRCount))

      return TokError("too many user SGPRs enabled");

    AMDGPU::MCKernelDescriptor::bits_set(

        KD.compute_pgm_rsrc2,

        MCConstantExpr::create(UserSGPRCount, getContext()),

        COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,

        COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT, getContext());

  }


  int64_t IVal = 0;

  if (!KD.kernarg_size->evaluateAsAbsolute(IVal))

    return TokError("Kernarg size should be resolvable");

  uint64_t kernarg_size = IVal;

  if (PreloadLength && kernarg_size &&

      (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))

    return TokError("Kernarg preload length + offset is larger than the "

                    "kernarg segment size");


  if (isGFX90A()) {

    if (!Seen.contains(".amdhsa_accum_offset"))

      return TokError(".amdhsa_accum_offset directive is required");

    int64_t EvaluatedAccum;

    bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);

    uint64_t UEvaluatedAccum = EvaluatedAccum;

    if (AccumEvaluatable &&

        (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))

      return TokError("accum_offset should be in range [4..256] in "

                      "increments of 4");


    int64_t EvaluatedNumVGPR;

    if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&

        AccumEvaluatable &&

        UEvaluatedAccum >

            alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))

      return TokError("accum_offset exceeds total VGPR allocation");

    const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(

        MCBinaryExpr::createDiv(

            AccumOffset, MCConstantExpr::create(4, getContext()), getContext()),

        MCConstantExpr::create(1, getContext()), getContext());

    MCKernelDescriptor::bits_set(KD.compute_pgm_rsrc3, AdjustedAccum,

                                 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,

                                 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,

                                 getContext());

  }


  if (isGFX1250())

    MCKernelDescriptor::bits_set(KD.compute_pgm_rsrc3, NamedBarCnt,

                                 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,

                                 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,

                                 getContext());


  if (IVersion.Major >= 10 && IVersion.Major < 12) {

    // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS

    if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {

      return TokError("shared_vgpr_count directive not valid on "

                      "wavefront size 32");

    }


    if (VGPRBlocksEvaluatable &&

        (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) >

         63)) {

      return TokError("shared_vgpr_count*2 + "

                      "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "

                      "exceed 63\n");

    }

  }


  getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,

                                                 NextFreeVGPR, NextFreeSGPR,

                                                 ReserveVCC, ReserveFlatScr);

  return false;

}


bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {

  uint32_t Version;

  if (ParseAsAbsoluteExpression(Version))

    return true;


  getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);

  return false;

}


bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,

                                               AMDGPUMCKernelCodeT &C) {

  // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing

  // assembly for backwards compatibility.

  if (ID == "max_scratch_backing_memory_byte_size") {

    Parser.eatToEndOfStatement();

    return false;

  }


  SmallString<40> ErrStr;

  raw_svector_ostream Err(ErrStr);

  if (!C.ParseKernelCodeT(ID, getParser(), Err)) {

    return TokError(Err.str());

  }

  Lex();


  if (ID == "enable_wavefront_size32") {

    if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {

      if (!isGFX10Plus())

        return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");

      if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])

        return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");

    } else {

      if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])

        return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");

    }

  }


  if (ID == "wavefront_size") {

    if (C.wavefront_size == 5) {

      if (!isGFX10Plus())

        return TokError("wavefront_size=5 is only allowed on GFX10+");

      if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])

        return TokError("wavefront_size=5 requires +WavefrontSize32");

    } else if (C.wavefront_size == 6) {

      if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])

        return TokError("wavefront_size=6 requires +WavefrontSize64");

    }

  }


  return false;

}


bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {

  AMDGPUMCKernelCodeT KernelCode;

  KernelCode.initDefault(&getSTI(), getContext());


  while (true) {

    // Lex EndOfStatement.  This is in a while loop, because lexing a comment

    // will set the current token to EndOfStatement.

    while(trySkipToken(AsmToken::EndOfStatement));


    StringRef ID;

    if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))

      return true;


    if (ID == ".end_amd_kernel_code_t")

      break;


    if (ParseAMDKernelCodeTValue(ID, KernelCode))

      return true;

  }


  KernelCode.validate(&getSTI(), getContext());

  getTargetStreamer().EmitAMDKernelCodeT(KernelCode);


  return false;

}


bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {

  StringRef KernelName;

  if (!parseId(KernelName, "expected symbol name"))

    return true;


  getTargetStreamer().EmitAMDGPUSymbolType(KernelName,

                                           ELF::STT_AMDGPU_HSA_KERNEL);


  KernelScope.initialize(getContext());

  return false;

}


bool AMDGPUAsmParser::ParseDirectiveISAVersion() {

  if (!getSTI().getTargetTriple().isAMDGCN()) {

    return Error(getLoc(),

                 ".amd_amdgpu_isa directive is not available on non-amdgcn "

                 "architectures");

  }


  auto TargetIDDirective = getLexer().getTok().getStringContents();

  if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)

    return Error(getParser().getTok().getLoc(), "target id must match options");


  getTargetStreamer().EmitISAVersion();

  Lex();


  return false;

}


bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {

  assert(isHsaAbi(getSTI()));


  std::string HSAMetadataString;

  if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,

                          HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))

    return true;


  if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))

    return Error(getLoc(), "invalid HSA metadata");


  return false;

}


/// Common code to parse out a block of text (typically YAML) between start and

/// end directives.

bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,

                                          const char *AssemblerDirectiveEnd,

                                          std::string &CollectString) {


  raw_string_ostream CollectStream(CollectString);


  getLexer().setSkipSpace(false);


  bool FoundEnd = false;

  while (!isToken(AsmToken::Eof)) {

    while (isToken(AsmToken::Space)) {

      CollectStream << getTokenStr();

      Lex();

    }


    if (trySkipId(AssemblerDirectiveEnd)) {

      FoundEnd = true;

      break;

    }


    CollectStream << Parser.parseStringToEndOfStatement()

                  << getContext().getAsmInfo()->getSeparatorString();


    Parser.eatToEndOfStatement();

  }


  getLexer().setSkipSpace(true);


  if (isToken(AsmToken::Eof) && !FoundEnd) {

    return TokError(Twine("expected directive ") +

                    Twine(AssemblerDirectiveEnd) + Twine(" not found"));

  }


  return false;

}


/// Parse the assembler directive for new MsgPack-format PAL metadata.

bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {

  std::string String;

  if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,

                          AMDGPU::PALMD::AssemblerDirectiveEnd, String))

    return true;


  auto *PALMetadata = getTargetStreamer().getPALMetadata();

  if (!PALMetadata->setFromString(String))

    return Error(getLoc(), "invalid PAL metadata");

  return false;

}


/// Parse the assembler directive for old linear-format PAL metadata.

bool AMDGPUAsmParser::ParseDirectivePALMetadata() {

  if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {

    return Error(getLoc(),

                 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "

                 "not available on non-amdpal OSes")).str());

  }


  auto *PALMetadata = getTargetStreamer().getPALMetadata();

  PALMetadata->setLegacy();

  for (;;) {

    uint32_t Key, Value;

    if (ParseAsAbsoluteExpression(Key)) {

      return TokError(Twine("invalid value in ") +

                      Twine(PALMD::AssemblerDirective));

    }

    if (!trySkipToken(AsmToken::Comma)) {

      return TokError(Twine("expected an even number of values in ") +

                      Twine(PALMD::AssemblerDirective));

    }

    if (ParseAsAbsoluteExpression(Value)) {

      return TokError(Twine("invalid value in ") +

                      Twine(PALMD::AssemblerDirective));

    }

    PALMetadata->setRegister(Key, Value);

    if (!trySkipToken(AsmToken::Comma))

      break;

  }

  return false;

}


/// ParseDirectiveAMDGPULDS

///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]

bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {

  if (getParser().checkForValidSection())

    return true;


  StringRef Name;

  SMLoc NameLoc = getLoc();

  if (getParser().parseIdentifier(Name))

    return TokError("expected identifier in directive");


  MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);

  if (getParser().parseComma())

    return true;


  unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());


  int64_t Size;

  SMLoc SizeLoc = getLoc();

  if (getParser().parseAbsoluteExpression(Size))

    return true;

  if (Size < 0)

    return Error(SizeLoc, "size must be non-negative");

  if (Size > LocalMemorySize)

    return Error(SizeLoc, "size is too large");


  int64_t Alignment = 4;

  if (trySkipToken(AsmToken::Comma)) {

    SMLoc AlignLoc = getLoc();

    if (getParser().parseAbsoluteExpression(Alignment))

      return true;

    if (Alignment < 0 || !isPowerOf2_64(Alignment))

      return Error(AlignLoc, "alignment must be a power of two");


    // Alignment larger than the size of LDS is possible in theory, as long

    // as the linker manages to place to symbol at address 0, but we do want

    // to make sure the alignment fits nicely into a 32-bit integer.

    if (Alignment >= 1u << 31)

      return Error(AlignLoc, "alignment is too large");

  }


  if (parseEOL())

    return true;


  Symbol->redefineIfPossible();

  if (!Symbol->isUndefined())

    return Error(NameLoc, "invalid symbol redefinition");


  getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));

  return false;

}


bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {

  StringRef IDVal = DirectiveID.getString();


  if (isHsaAbi(getSTI())) {

    if (IDVal == ".amdhsa_kernel")

     return ParseDirectiveAMDHSAKernel();


    if (IDVal == ".amdhsa_code_object_version")

      return ParseDirectiveAMDHSACodeObjectVersion();


    // TODO: Restructure/combine with PAL metadata directive.

    if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)

      return ParseDirectiveHSAMetadata();

  } else {

    if (IDVal == ".amd_kernel_code_t")

      return ParseDirectiveAMDKernelCodeT();


    if (IDVal == ".amdgpu_hsa_kernel")

      return ParseDirectiveAMDGPUHsaKernel();


    if (IDVal == ".amd_amdgpu_isa")

      return ParseDirectiveISAVersion();


    if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) {

      return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +

                              Twine(" directive is "

                                    "not available on non-amdhsa OSes"))

                                 .str());

    }

  }


  if (IDVal == ".amdgcn_target")

    return ParseDirectiveAMDGCNTarget();


  if (IDVal == ".amdgpu_lds")

    return ParseDirectiveAMDGPULDS();


  if (IDVal == PALMD::AssemblerDirectiveBegin)

    return ParseDirectivePALMetadataBegin();


  if (IDVal == PALMD::AssemblerDirective)

    return ParseDirectivePALMetadata();


  return true;

}


bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,

                                           MCRegister Reg) {

  if (MRI.regsOverlap(TTMP12_TTMP13_TTMP14_TTMP15, Reg))

    return isGFX9Plus();


  // GFX10+ has 2 more SGPRs 104 and 105.

  if (MRI.regsOverlap(SGPR104_SGPR105, Reg))

    return hasSGPR104_SGPR105();


  switch (Reg.id()) {

  case SRC_SHARED_BASE_LO:

  case SRC_SHARED_BASE:

  case SRC_SHARED_LIMIT_LO:

  case SRC_SHARED_LIMIT:

  case SRC_PRIVATE_BASE_LO:

  case SRC_PRIVATE_BASE:

  case SRC_PRIVATE_LIMIT_LO:

  case SRC_PRIVATE_LIMIT:

    return isGFX9Plus();

  case SRC_FLAT_SCRATCH_BASE_LO:

  case SRC_FLAT_SCRATCH_BASE_HI:

    return hasGloballyAddressableScratch();

  case SRC_POPS_EXITING_WAVE_ID:

    return isGFX9Plus() && !isGFX11Plus();

  case TBA:

  case TBA_LO:

  case TBA_HI:

  case TMA:

  case TMA_LO:

  case TMA_HI:

    return !isGFX9Plus();

  case XNACK_MASK:

  case XNACK_MASK_LO:

  case XNACK_MASK_HI:

    return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();

  case SGPR_NULL:

    return isGFX10Plus();

  case SRC_EXECZ:

  case SRC_VCCZ:

    return !isGFX11Plus();

  default:

    break;

  }


  if (isCI())

    return true;


  if (isSI() || isGFX10Plus()) {

    // No flat_scr on SI.

    // On GFX10Plus flat scratch is not a valid register operand and can only be

    // accessed with s_setreg/s_getreg.

    switch (Reg.id()) {

    case FLAT_SCR:

    case FLAT_SCR_LO:

    case FLAT_SCR_HI:

      return false;

    default:

      return true;

    }

  }


  // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that

  // SI/CI have.

  if (MRI.regsOverlap(SGPR102_SGPR103, Reg))

    return hasSGPR102_SGPR103();


  return true;

}


ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,

                                          StringRef Mnemonic,

                                          OperandMode Mode) {

  ParseStatus Res = parseVOPD(Operands);

  if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))

    return Res;


  // Try to parse with a custom parser

  Res = MatchOperandParserImpl(Operands, Mnemonic);


  // If we successfully parsed the operand or if there as an error parsing,

  // we are done.

  //

  // If we are parsing after we reach EndOfStatement then this means we

  // are appending default values to the Operands list.  This is only done

  // by custom parser, so we shouldn't continue on to the generic parsing.

  if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))

    return Res;


  SMLoc RBraceLoc;

  SMLoc LBraceLoc = getLoc();

  if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {

    unsigned Prefix = Operands.size();


    for (;;) {

      auto Loc = getLoc();

      Res = parseReg(Operands);

      if (Res.isNoMatch())

        Error(Loc, "expected a register");

      if (!Res.isSuccess())

        return ParseStatus::Failure;


      RBraceLoc = getLoc();

      if (trySkipToken(AsmToken::RBrac))

        break;


      if (!skipToken(AsmToken::Comma,

                     "expected a comma or a closing square bracket"))

        return ParseStatus::Failure;

    }


    if (Operands.size() - Prefix > 1) {

      Operands.insert(Operands.begin() + Prefix,

                      AMDGPUOperand::CreateToken(this, "[", LBraceLoc));

      Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));

    }


    return ParseStatus::Success;

  }


  return parseRegOrImm(Operands);

}


StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {

  // Clear any forced encodings from the previous instruction.

  setForcedEncodingSize(0);

  setForcedDPP(false);

  setForcedSDWA(false);


  if (Name.consume_back("_e64_dpp")) {

    setForcedDPP(true);

    setForcedEncodingSize(64);

    return Name;

  }

  if (Name.consume_back("_e64")) {

    setForcedEncodingSize(64);

    return Name;

  }

  if (Name.consume_back("_e32")) {

    setForcedEncodingSize(32);

    return Name;

  }

  if (Name.consume_back("_dpp")) {

    setForcedDPP(true);

    return Name;

  }

  if (Name.consume_back("_sdwa")) {

    setForcedSDWA(true);

    return Name;

  }

  return Name;

}


static void applyMnemonicAliases(StringRef &Mnemonic,

                                 const FeatureBitset &Features,

                                 unsigned VariantID);


bool AMDGPUAsmParser::parseInstruction(ParseInstructionInfo &Info,

                                       StringRef Name, SMLoc NameLoc,

                                       OperandVector &Operands) {

  // Add the instruction mnemonic

  Name = parseMnemonicSuffix(Name);


  // If the target architecture uses MnemonicAlias, call it here to parse

  // operands correctly.

  applyMnemonicAliases(Name, getAvailableFeatures(), 0);


  Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));


  bool IsMIMG = Name.starts_with("image_");


  while (!trySkipToken(AsmToken::EndOfStatement)) {

    OperandMode Mode = OperandMode_Default;

    if (IsMIMG && isGFX10Plus() && Operands.size() == 2)

      Mode = OperandMode_NSA;

    ParseStatus Res = parseOperand(Operands, Name, Mode);


    if (!Res.isSuccess()) {

      checkUnsupportedInstruction(Name, NameLoc);

      if (!Parser.hasPendingError()) {

        // FIXME: use real operand location rather than the current location.

        StringRef Msg = Res.isFailure() ? "failed parsing operand."

                                        : "not a valid operand.";

        Error(getLoc(), Msg);

      }

      while (!trySkipToken(AsmToken::EndOfStatement)) {

        lex();

      }

      return true;

    }


    // Eat the comma or space if there is one.

    trySkipToken(AsmToken::Comma);

  }


  return false;

}


//===----------------------------------------------------------------------===//

// Utility functions

//===----------------------------------------------------------------------===//


ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,

                                          OperandVector &Operands) {

  SMLoc S = getLoc();

  if (!trySkipId(Name))

    return ParseStatus::NoMatch;


  Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));

  return ParseStatus::Success;

}


ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,

                                                int64_t &IntVal) {


  if (!trySkipId(Prefix, AsmToken::Colon))

    return ParseStatus::NoMatch;


  return parseExpr(IntVal) ? ParseStatus::Success : ParseStatus::Failure;

}


ParseStatus AMDGPUAsmParser::parseIntWithPrefix(

    const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,

    std::function<bool(int64_t &)> ConvertResult) {

  SMLoc S = getLoc();

  int64_t Value = 0;


  ParseStatus Res = parseIntWithPrefix(Prefix, Value);

  if (!Res.isSuccess())

    return Res;


  if (ConvertResult && !ConvertResult(Value)) {

    Error(S, "invalid " + StringRef(Prefix) + " value.");

  }


  Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));

  return ParseStatus::Success;

}


ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(

    const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,

    bool (*ConvertResult)(int64_t &)) {

  SMLoc S = getLoc();

  if (!trySkipId(Prefix, AsmToken::Colon))

    return ParseStatus::NoMatch;


  if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))

    return ParseStatus::Failure;


  unsigned Val = 0;

  const unsigned MaxSize = 4;


  // FIXME: How to verify the number of elements matches the number of src

  // operands?

  for (int I = 0; ; ++I) {

    int64_t Op;

    SMLoc Loc = getLoc();

    if (!parseExpr(Op))

      return ParseStatus::Failure;


    if (Op != 0 && Op != 1)

      return Error(Loc, "invalid " + StringRef(Prefix) + " value.");


    Val |= (Op << I);


    if (trySkipToken(AsmToken::RBrac))

      break;


    if (I + 1 == MaxSize)

      return Error(getLoc(), "expected a closing square bracket");


    if (!skipToken(AsmToken::Comma, "expected a comma"))

      return ParseStatus::Failure;

  }


  Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));

  return ParseStatus::Success;

}


ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,

                                           OperandVector &Operands,

                                           AMDGPUOperand::ImmTy ImmTy) {

  int64_t Bit;

  SMLoc S = getLoc();


  if (trySkipId(Name)) {

    Bit = 1;

  } else if (trySkipId("no", Name)) {

    Bit = 0;

  } else {

    return ParseStatus::NoMatch;

  }


  if (Name == "r128" && !hasMIMG_R128())

    return Error(S, "r128 modifier is not supported on this GPU");

  if (Name == "a16" && !hasA16())

    return Error(S, "a16 modifier is not supported on this GPU");


  if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)

    ImmTy = AMDGPUOperand::ImmTyR128A16;


  Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));

  return ParseStatus::Success;

}


unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,

                                      bool &Disabling) const {

  Disabling = Id.consume_front("no");


  if (isGFX940() && !Mnemo.starts_with("s_")) {

    return StringSwitch<unsigned>(Id)

        .Case("nt", AMDGPU::CPol::NT)

        .Case("sc0", AMDGPU::CPol::SC0)

        .Case("sc1", AMDGPU::CPol::SC1)

        .Default(0);

  }


  return StringSwitch<unsigned>(Id)

      .Case("dlc", AMDGPU::CPol::DLC)

      .Case("glc", AMDGPU::CPol::GLC)

      .Case("scc", AMDGPU::CPol::SCC)

      .Case("slc", AMDGPU::CPol::SLC)

      .Default(0);

}


ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {

  if (isGFX12Plus()) {

    SMLoc StringLoc = getLoc();


    int64_t CPolVal = 0;

    ParseStatus ResTH = ParseStatus::NoMatch;

    ParseStatus ResScope = ParseStatus::NoMatch;

    ParseStatus ResNV = ParseStatus::NoMatch;

    ParseStatus ResScal = ParseStatus::NoMatch;


    for (;;) {

      if (ResTH.isNoMatch()) {

        int64_t TH;

        ResTH = parseTH(Operands, TH);

        if (ResTH.isFailure())

          return ResTH;

        if (ResTH.isSuccess()) {

          CPolVal |= TH;

          continue;

        }

      }


      if (ResScope.isNoMatch()) {

        int64_t Scope;

        ResScope = parseScope(Operands, Scope);

        if (ResScope.isFailure())

          return ResScope;

        if (ResScope.isSuccess()) {

          CPolVal |= Scope;

          continue;

        }

      }


      // NV bit exists on GFX12+, but does something starting from GFX1250.

      // Allow parsing on all GFX12 and fail on validation for better

      // diagnostics.

      if (ResNV.isNoMatch()) {

        if (trySkipId("nv")) {

          ResNV = ParseStatus::Success;

          CPolVal |= CPol::NV;

          continue;

        } else if (trySkipId("no", "nv")) {

          ResNV = ParseStatus::Success;

          continue;

        }

      }


      if (ResScal.isNoMatch()) {

        if (trySkipId("scale_offset")) {

          ResScal = ParseStatus::Success;

          CPolVal |= CPol::SCAL;

          continue;

        } else if (trySkipId("no", "scale_offset")) {

          ResScal = ParseStatus::Success;

          continue;

        }

      }


      break;

    }


    if (ResTH.isNoMatch() && ResScope.isNoMatch() && ResNV.isNoMatch() &&

        ResScal.isNoMatch())

      return ParseStatus::NoMatch;


    Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,

                                                AMDGPUOperand::ImmTyCPol));

    return ParseStatus::Success;

  }


  StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();

  SMLoc OpLoc = getLoc();

  unsigned Enabled = 0, Seen = 0;

  for (;;) {

    SMLoc S = getLoc();

    bool Disabling;

    unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);

    if (!CPol)

      break;


    lex();


    if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)

      return Error(S, "dlc modifier is not supported on this GPU");


    if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)

      return Error(S, "scc modifier is not supported on this GPU");


    if (Seen & CPol)

      return Error(S, "duplicate cache policy modifier");


    if (!Disabling)

      Enabled |= CPol;


    Seen |= CPol;

  }


  if (!Seen)

    return ParseStatus::NoMatch;


  Operands.push_back(

      AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));

  return ParseStatus::Success;

}


ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,

                                        int64_t &Scope) {

  static const unsigned Scopes[] = {CPol::SCOPE_CU, CPol::SCOPE_SE,

                                    CPol::SCOPE_DEV, CPol::SCOPE_SYS};


  ParseStatus Res = parseStringOrIntWithPrefix(

      Operands, "scope", {"SCOPE_CU", "SCOPE_SE", "SCOPE_DEV", "SCOPE_SYS"},

      Scope);


  if (Res.isSuccess())

    Scope = Scopes[Scope];


  return Res;

}


ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {

  TH = AMDGPU::CPol::TH_RT; // default


  StringRef Value;

  SMLoc StringLoc;

  ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);

  if (!Res.isSuccess())

    return Res;


  if (Value == "TH_DEFAULT")

    TH = AMDGPU::CPol::TH_RT;

  else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_WB" ||

           Value == "TH_LOAD_NT_WB") {

    return Error(StringLoc, "invalid th value");

  } else if (Value.consume_front("TH_ATOMIC_")) {

    TH = AMDGPU::CPol::TH_TYPE_ATOMIC;

  } else if (Value.consume_front("TH_LOAD_")) {

    TH = AMDGPU::CPol::TH_TYPE_LOAD;

  } else if (Value.consume_front("TH_STORE_")) {

    TH = AMDGPU::CPol::TH_TYPE_STORE;

  } else {

    return Error(StringLoc, "invalid th value");

  }


  if (Value == "BYPASS")

    TH |= AMDGPU::CPol::TH_REAL_BYPASS;


  if (TH != 0) {

    if (TH & AMDGPU::CPol::TH_TYPE_ATOMIC)

      TH |= StringSwitch<int64_t>(Value)

                .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)

                .Case("RT", AMDGPU::CPol::TH_RT)

                .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)

                .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT)

                .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |

                                       AMDGPU::CPol::TH_ATOMIC_RETURN)

                .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE)

                .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |

                                        AMDGPU::CPol::TH_ATOMIC_NT)

                .Default(0xffffffff);

    else

      TH |= StringSwitch<int64_t>(Value)

                .Case("RT", AMDGPU::CPol::TH_RT)

                .Case("NT", AMDGPU::CPol::TH_NT)

                .Case("HT", AMDGPU::CPol::TH_HT)

                .Case("LU", AMDGPU::CPol::TH_LU)

                .Case("WB", AMDGPU::CPol::TH_WB)

                .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)

                .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)

                .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)

                .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)

                .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)

                .Default(0xffffffff);

  }


  if (TH == 0xffffffff)

    return Error(StringLoc, "invalid th value");


  return ParseStatus::Success;

}


static void


addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands,

                      AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,

                      AMDGPUOperand::ImmTy ImmT, int64_t Default = 0,

                      std::optional<unsigned> InsertAt = std::nullopt) {

  auto i = OptionalIdx.find(ImmT);

  if (i != OptionalIdx.end()) {

    unsigned Idx = i->second;

    const AMDGPUOperand &Op =

        static_cast<const AMDGPUOperand &>(*Operands[Idx]);

    if (InsertAt)

      Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Op.getImm()));

    else

      Op.addImmOperands(Inst, 1);

  } else {

    if (InsertAt.has_value())

      Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Default));

    else

      Inst.addOperand(MCOperand::createImm(Default));

  }

}


ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,

                                                   StringRef &Value,

                                                   SMLoc &StringLoc) {

  if (!trySkipId(Prefix, AsmToken::Colon))

    return ParseStatus::NoMatch;


  StringLoc = getLoc();

  return parseId(Value, "expected an identifier") ? ParseStatus::Success

                                                  : ParseStatus::Failure;

}


ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(

    OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,

    int64_t &IntVal) {

  if (!trySkipId(Name, AsmToken::Colon))

    return ParseStatus::NoMatch;


  SMLoc StringLoc = getLoc();


  StringRef Value;

  if (isToken(AsmToken::Identifier)) {

    Value = getTokenStr();

    lex();


    for (IntVal = 0; IntVal < (int64_t)Ids.size(); ++IntVal)

      if (Value == Ids[IntVal])

        break;

  } else if (!parseExpr(IntVal))

    return ParseStatus::Failure;


  if (IntVal < 0 || IntVal >= (int64_t)Ids.size())

    return Error(StringLoc, "invalid " + Twine(Name) + " value");


  return ParseStatus::Success;

}


ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(

    OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,

    AMDGPUOperand::ImmTy Type) {

  SMLoc S = getLoc();

  int64_t IntVal;


  ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);

  if (Res.isSuccess())

    Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S, Type));


  return Res;

}


//===----------------------------------------------------------------------===//

// MTBUF format

//===----------------------------------------------------------------------===//


bool AMDGPUAsmParser::tryParseFmt(const char *Pref,

                                  int64_t MaxVal,

                                  int64_t &Fmt) {

  int64_t Val;

  SMLoc Loc = getLoc();


  auto Res = parseIntWithPrefix(Pref, Val);

  if (Res.isFailure())

    return false;

  if (Res.isNoMatch())

    return true;


  if (Val < 0 || Val > MaxVal) {

    Error(Loc, Twine("out of range ", StringRef(Pref)));

    return false;

  }


  Fmt = Val;

  return true;

}


ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,

                                              AMDGPUOperand::ImmTy ImmTy) {

  const char *Pref = "index_key";

  int64_t ImmVal = 0;

  SMLoc Loc = getLoc();

  auto Res = parseIntWithPrefix(Pref, ImmVal);

  if (!Res.isSuccess())

    return Res;


  if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||

       ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&

      (ImmVal < 0 || ImmVal > 1))

    return Error(Loc, Twine("out of range ", StringRef(Pref)));


  if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))

    return Error(Loc, Twine("out of range ", StringRef(Pref)));


  Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));

  return ParseStatus::Success;

}


ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {

  return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);

}


ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {

  return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);

}


ParseStatus AMDGPUAsmParser::parseIndexKey32bit(OperandVector &Operands) {

  return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey32bit);

}


ParseStatus AMDGPUAsmParser::tryParseMatrixFMT(OperandVector &Operands,

                                               StringRef Name,

                                               AMDGPUOperand::ImmTy Type) {

  return parseStringOrIntWithPrefix(Operands, Name,

                                    {"MATRIX_FMT_FP8", "MATRIX_FMT_BF8",

                                     "MATRIX_FMT_FP6", "MATRIX_FMT_BF6",

                                     "MATRIX_FMT_FP4"},

                                    Type);

}


ParseStatus AMDGPUAsmParser::parseMatrixAFMT(OperandVector &Operands) {

  return tryParseMatrixFMT(Operands, "matrix_a_fmt",

                           AMDGPUOperand::ImmTyMatrixAFMT);

}


ParseStatus AMDGPUAsmParser::parseMatrixBFMT(OperandVector &Operands) {

  return tryParseMatrixFMT(Operands, "matrix_b_fmt",

                           AMDGPUOperand::ImmTyMatrixBFMT);

}


ParseStatus AMDGPUAsmParser::tryParseMatrixScale(OperandVector &Operands,

                                                 StringRef Name,

                                                 AMDGPUOperand::ImmTy Type) {

  return parseStringOrIntWithPrefix(

      Operands, Name, {"MATRIX_SCALE_ROW0", "MATRIX_SCALE_ROW1"}, Type);

}


ParseStatus AMDGPUAsmParser::parseMatrixAScale(OperandVector &Operands) {

  return tryParseMatrixScale(Operands, "matrix_a_scale",

                             AMDGPUOperand::ImmTyMatrixAScale);

}


ParseStatus AMDGPUAsmParser::parseMatrixBScale(OperandVector &Operands) {

  return tryParseMatrixScale(Operands, "matrix_b_scale",

                             AMDGPUOperand::ImmTyMatrixBScale);

}


ParseStatus AMDGPUAsmParser::tryParseMatrixScaleFmt(OperandVector &Operands,

                                                    StringRef Name,

                                                    AMDGPUOperand::ImmTy Type) {

  return parseStringOrIntWithPrefix(

      Operands, Name,

      {"MATRIX_SCALE_FMT_E8", "MATRIX_SCALE_FMT_E5M3", "MATRIX_SCALE_FMT_E4M3"},

      Type);

}


ParseStatus AMDGPUAsmParser::parseMatrixAScaleFmt(OperandVector &Operands) {

  return tryParseMatrixScaleFmt(Operands, "matrix_a_scale_fmt",

                                AMDGPUOperand::ImmTyMatrixAScaleFmt);

}


ParseStatus AMDGPUAsmParser::parseMatrixBScaleFmt(OperandVector &Operands) {

  return tryParseMatrixScaleFmt(Operands, "matrix_b_scale_fmt",

                                AMDGPUOperand::ImmTyMatrixBScaleFmt);

}


// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their

// values to live in a joint format operand in the MCInst encoding.

ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {

  using namespace llvm::AMDGPU::MTBUFFormat;


  int64_t Dfmt = DFMT_UNDEF;

  int64_t Nfmt = NFMT_UNDEF;


  // dfmt and nfmt can appear in either order, and each is optional.

  for (int I = 0; I < 2; ++I) {

    if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))

      return ParseStatus::Failure;


    if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))

      return ParseStatus::Failure;


    // Skip optional comma between dfmt/nfmt

    // but guard against 2 commas following each other.

    if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&

        !peekToken().is(AsmToken::Comma)) {

      trySkipToken(AsmToken::Comma);

    }

  }


  if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)

    return ParseStatus::NoMatch;


  Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;

  Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;


  Format = encodeDfmtNfmt(Dfmt, Nfmt);

  return ParseStatus::Success;

}


ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {

  using namespace llvm::AMDGPU::MTBUFFormat;


  int64_t Fmt = UFMT_UNDEF;


  if (!tryParseFmt("format", UFMT_MAX, Fmt))

    return ParseStatus::Failure;


  if (Fmt == UFMT_UNDEF)

    return ParseStatus::NoMatch;


  Format = Fmt;

  return ParseStatus::Success;

}


bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,

                                    int64_t &Nfmt,

                                    StringRef FormatStr,

                                    SMLoc Loc) {

  using namespace llvm::AMDGPU::MTBUFFormat;

  int64_t Format;


  Format = getDfmt(FormatStr);

  if (Format != DFMT_UNDEF) {

    Dfmt = Format;

    return true;

  }


  Format = getNfmt(FormatStr, getSTI());

  if (Format != NFMT_UNDEF) {

    Nfmt = Format;

    return true;

  }


  Error(Loc, "unsupported format");

  return false;

}


ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,

                                                      SMLoc FormatLoc,

                                                      int64_t &Format) {

  using namespace llvm::AMDGPU::MTBUFFormat;


  int64_t Dfmt = DFMT_UNDEF;

  int64_t Nfmt = NFMT_UNDEF;

  if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))

    return ParseStatus::Failure;


  if (trySkipToken(AsmToken::Comma)) {

    StringRef Str;

    SMLoc Loc = getLoc();

    if (!parseId(Str, "expected a format string") ||

        !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))

      return ParseStatus::Failure;

    if (Dfmt == DFMT_UNDEF)

      return Error(Loc, "duplicate numeric format");

    if (Nfmt == NFMT_UNDEF)

      return Error(Loc, "duplicate data format");

  }


  Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;

  Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;


  if (isGFX10Plus()) {

    auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());

    if (Ufmt == UFMT_UNDEF)

      return Error(FormatLoc, "unsupported format");

    Format = Ufmt;

  } else {

    Format = encodeDfmtNfmt(Dfmt, Nfmt);

  }


  return ParseStatus::Success;

}


ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,

                                                        SMLoc Loc,

                                                        int64_t &Format) {

  using namespace llvm::AMDGPU::MTBUFFormat;


  auto Id = getUnifiedFormat(FormatStr, getSTI());

  if (Id == UFMT_UNDEF)

    return ParseStatus::NoMatch;


  if (!isGFX10Plus())

    return Error(Loc, "unified format is not supported on this GPU");


  Format = Id;

  return ParseStatus::Success;

}


ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {

  using namespace llvm::AMDGPU::MTBUFFormat;

  SMLoc Loc = getLoc();


  if (!parseExpr(Format))

    return ParseStatus::Failure;

  if (!isValidFormatEncoding(Format, getSTI()))

    return Error(Loc, "out of range format");


  return ParseStatus::Success;

}


ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {

  using namespace llvm::AMDGPU::MTBUFFormat;


  if (!trySkipId("format", AsmToken::Colon))

    return ParseStatus::NoMatch;


  if (trySkipToken(AsmToken::LBrac)) {

    StringRef FormatStr;

    SMLoc Loc = getLoc();

    if (!parseId(FormatStr, "expected a format string"))

      return ParseStatus::Failure;


    auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);

    if (Res.isNoMatch())

      Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);

    if (!Res.isSuccess())

      return Res;


    if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))

      return ParseStatus::Failure;


    return ParseStatus::Success;

  }


  return parseNumericFormat(Format);

}


ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {

  using namespace llvm::AMDGPU::MTBUFFormat;


  int64_t Format = getDefaultFormatEncoding(getSTI());

  ParseStatus Res;

  SMLoc Loc = getLoc();


  // Parse legacy format syntax.

  Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);

  if (Res.isFailure())

    return Res;


  bool FormatFound = Res.isSuccess();


  Operands.push_back(

    AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));


  if (FormatFound)

    trySkipToken(AsmToken::Comma);


  if (isToken(AsmToken::EndOfStatement)) {

    // We are expecting an soffset operand,

    // but let matcher handle the error.

    return ParseStatus::Success;

  }


  // Parse soffset.

  Res = parseRegOrImm(Operands);

  if (!Res.isSuccess())

    return Res;


  trySkipToken(AsmToken::Comma);


  if (!FormatFound) {

    Res = parseSymbolicOrNumericFormat(Format);

    if (Res.isFailure())

      return Res;

    if (Res.isSuccess()) {

      auto Size = Operands.size();

      AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);

      assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);

      Op.setImm(Format);

    }

    return ParseStatus::Success;

  }


  if (isId("format") && peekToken().is(AsmToken::Colon))

    return Error(getLoc(), "duplicate format");

  return ParseStatus::Success;

}


ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {

  ParseStatus Res =

      parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);

  if (Res.isNoMatch()) {

    Res = parseIntWithPrefix("inst_offset", Operands,

                             AMDGPUOperand::ImmTyInstOffset);

  }

  return Res;

}


ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {

  ParseStatus Res =

      parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);

  if (Res.isNoMatch())

    Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);

  return Res;

}


ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {

  ParseStatus Res =

      parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);

  if (Res.isNoMatch()) {

    Res =

        parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);

  }

  return Res;

}


//===----------------------------------------------------------------------===//

// Exp

//===----------------------------------------------------------------------===//


void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {

  OptionalImmIndexMap OptionalIdx;


  unsigned OperandIdx[4];

  unsigned EnMask = 0;

  int SrcIdx = 0;


  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {

    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);


    // Add the register arguments

    if (Op.isReg()) {

      assert(SrcIdx < 4);

      OperandIdx[SrcIdx] = Inst.size();

      Op.addRegOperands(Inst, 1);

      ++SrcIdx;

      continue;

    }


    if (Op.isOff()) {

      assert(SrcIdx < 4);

      OperandIdx[SrcIdx] = Inst.size();

      Inst.addOperand(MCOperand::createReg(MCRegister()));

      ++SrcIdx;

      continue;

    }


    if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {

      Op.addImmOperands(Inst, 1);

      continue;

    }


    if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))

      continue;


    // Handle optional arguments

    OptionalIdx[Op.getImmTy()] = i;

  }


  assert(SrcIdx == 4);


  bool Compr = false;

  if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {

    Compr = true;

    Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);

    Inst.getOperand(OperandIdx[2]).setReg(MCRegister());

    Inst.getOperand(OperandIdx[3]).setReg(MCRegister());

  }


  for (auto i = 0; i < SrcIdx; ++i) {

    if (Inst.getOperand(OperandIdx[i]).getReg()) {

      EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);

    }

  }


  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);

  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);


  Inst.addOperand(MCOperand::createImm(EnMask));

}


//===----------------------------------------------------------------------===//

// s_waitcnt

//===----------------------------------------------------------------------===//


static bool


encodeCnt(

  const AMDGPU::IsaVersion ISA,

  int64_t &IntVal,

  int64_t CntVal,

  bool Saturate,

  unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),

  unsigned (*decode)(const IsaVersion &Version, unsigned))

{

  bool Failed = false;


  IntVal = encode(ISA, IntVal, CntVal);

  if (CntVal != decode(ISA, IntVal)) {

    if (Saturate) {

      IntVal = encode(ISA, IntVal, -1);

    } else {

      Failed = true;

    }

  }

  return Failed;

}


bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {


  SMLoc CntLoc = getLoc();

  StringRef CntName = getTokenStr();


  if (!skipToken(AsmToken::Identifier, "expected a counter name") ||

      !skipToken(AsmToken::LParen, "expected a left parenthesis"))

    return false;


  int64_t CntVal;

  SMLoc ValLoc = getLoc();

  if (!parseExpr(CntVal))

    return false;


  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());


  bool Failed = true;

  bool Sat = CntName.ends_with("_sat");


  if (CntName == "vmcnt" || CntName == "vmcnt_sat") {

    Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);

  } else if (CntName == "expcnt" || CntName == "expcnt_sat") {

    Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);

  } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {

    Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);

  } else {

    Error(CntLoc, "invalid counter name " + CntName);

    return false;

  }


  if (Failed) {

    Error(ValLoc, "too large value for " + CntName);

    return false;

  }


  if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))

    return false;


  if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {

    if (isToken(AsmToken::EndOfStatement)) {

      Error(getLoc(), "expected a counter name");

      return false;

    }

  }


  return true;

}


ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {

  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());

  int64_t Waitcnt = getWaitcntBitMask(ISA);

  SMLoc S = getLoc();


  if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {

    while (!isToken(AsmToken::EndOfStatement)) {

      if (!parseCnt(Waitcnt))

        return ParseStatus::Failure;

    }

  } else {

    if (!parseExpr(Waitcnt))

      return ParseStatus::Failure;

  }


  Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));

  return ParseStatus::Success;

}


bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {

  SMLoc FieldLoc = getLoc();

  StringRef FieldName = getTokenStr();

  if (!skipToken(AsmToken::Identifier, "expected a field name") ||

      !skipToken(AsmToken::LParen, "expected a left parenthesis"))

    return false;


  SMLoc ValueLoc = getLoc();

  StringRef ValueName = getTokenStr();

  if (!skipToken(AsmToken::Identifier, "expected a value name") ||

      !skipToken(AsmToken::RParen, "expected a right parenthesis"))

    return false;


  unsigned Shift;

  if (FieldName == "instid0") {

    Shift = 0;

  } else if (FieldName == "instskip") {

    Shift = 4;

  } else if (FieldName == "instid1") {

    Shift = 7;

  } else {

    Error(FieldLoc, "invalid field name " + FieldName);

    return false;

  }


  int Value;

  if (Shift == 4) {

    // Parse values for instskip.

    Value = StringSwitch<int>(ValueName)

                .Case("SAME", 0)

                .Case("NEXT", 1)

                .Case("SKIP_1", 2)

                .Case("SKIP_2", 3)

                .Case("SKIP_3", 4)

                .Case("SKIP_4", 5)

                .Default(-1);

  } else {

    // Parse values for instid0 and instid1.

    Value = StringSwitch<int>(ValueName)

                .Case("NO_DEP", 0)

                .Case("VALU_DEP_1", 1)

                .Case("VALU_DEP_2", 2)

                .Case("VALU_DEP_3", 3)

                .Case("VALU_DEP_4", 4)

                .Case("TRANS32_DEP_1", 5)

                .Case("TRANS32_DEP_2", 6)

                .Case("TRANS32_DEP_3", 7)

                .Case("FMA_ACCUM_CYCLE_1", 8)

                .Case("SALU_CYCLE_1", 9)

                .Case("SALU_CYCLE_2", 10)

                .Case("SALU_CYCLE_3", 11)

                .Default(-1);

  }

  if (Value < 0) {

    Error(ValueLoc, "invalid value name " + ValueName);

    return false;

  }


  Delay |= Value << Shift;

  return true;

}


ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {

  int64_t Delay = 0;

  SMLoc S = getLoc();


  if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {

    do {

      if (!parseDelay(Delay))

        return ParseStatus::Failure;

    } while (trySkipToken(AsmToken::Pipe));

  } else {

    if (!parseExpr(Delay))

      return ParseStatus::Failure;

  }


  Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));

  return ParseStatus::Success;

}


bool

AMDGPUOperand::isSWaitCnt() const {

  return isImm();

}


bool AMDGPUOperand::isSDelayALU() const { return isImm(); }


//===----------------------------------------------------------------------===//

// DepCtr

//===----------------------------------------------------------------------===//


void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,

                                  StringRef DepCtrName) {

  switch (ErrorId) {

  case OPR_ID_UNKNOWN:

    Error(Loc, Twine("invalid counter name ", DepCtrName));

    return;

  case OPR_ID_UNSUPPORTED:

    Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));

    return;

  case OPR_ID_DUPLICATE:

    Error(Loc, Twine("duplicate counter name ", DepCtrName));

    return;

  case OPR_VAL_INVALID:

    Error(Loc, Twine("invalid value for ", DepCtrName));

    return;

  default:

    assert(false);

  }

}


bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {


  using namespace llvm::AMDGPU::DepCtr;


  SMLoc DepCtrLoc = getLoc();

  StringRef DepCtrName = getTokenStr();


  if (!skipToken(AsmToken::Identifier, "expected a counter name") ||

      !skipToken(AsmToken::LParen, "expected a left parenthesis"))

    return false;


  int64_t ExprVal;

  if (!parseExpr(ExprVal))

    return false;


  unsigned PrevOprMask = UsedOprMask;

  int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());


  if (CntVal < 0) {

    depCtrError(DepCtrLoc, CntVal, DepCtrName);

    return false;

  }


  if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))

    return false;


  if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {

    if (isToken(AsmToken::EndOfStatement)) {

      Error(getLoc(), "expected a counter name");

      return false;

    }

  }


  unsigned CntValMask = PrevOprMask ^ UsedOprMask;

  DepCtr = (DepCtr & ~CntValMask) | CntVal;

  return true;

}


ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {

  using namespace llvm::AMDGPU::DepCtr;


  int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());

  SMLoc Loc = getLoc();


  if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {

    unsigned UsedOprMask = 0;

    while (!isToken(AsmToken::EndOfStatement)) {

      if (!parseDepCtr(DepCtr, UsedOprMask))

        return ParseStatus::Failure;

    }

  } else {

    if (!parseExpr(DepCtr))

      return ParseStatus::Failure;

  }


  Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));

  return ParseStatus::Success;

}


bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }


//===----------------------------------------------------------------------===//

// hwreg

//===----------------------------------------------------------------------===//


ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,

                                            OperandInfoTy &Offset,

                                            OperandInfoTy &Width) {

  using namespace llvm::AMDGPU::Hwreg;


  if (!trySkipId("hwreg", AsmToken::LParen))

    return ParseStatus::NoMatch;


  // The register may be specified by name or using a numeric code

  HwReg.Loc = getLoc();

  if (isToken(AsmToken::Identifier) &&

      (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {

    HwReg.IsSymbolic = true;

    lex(); // skip register name

  } else if (!parseExpr(HwReg.Val, "a register name")) {

    return ParseStatus::Failure;

  }


  if (trySkipToken(AsmToken::RParen))

    return ParseStatus::Success;


  // parse optional params

  if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))

    return ParseStatus::Failure;


  Offset.Loc = getLoc();

  if (!parseExpr(Offset.Val))

    return ParseStatus::Failure;


  if (!skipToken(AsmToken::Comma, "expected a comma"))

    return ParseStatus::Failure;


  Width.Loc = getLoc();

  if (!parseExpr(Width.Val) ||

      !skipToken(AsmToken::RParen, "expected a closing parenthesis"))

    return ParseStatus::Failure;


  return ParseStatus::Success;

}


ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {

  using namespace llvm::AMDGPU::Hwreg;


  int64_t ImmVal = 0;

  SMLoc Loc = getLoc();


  StructuredOpField HwReg("id", "hardware register", HwregId::Width,

                          HwregId::Default);

  StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,

                           HwregOffset::Default);

  struct : StructuredOpField {

    using StructuredOpField::StructuredOpField;

    bool validate(AMDGPUAsmParser &Parser) const override {

      if (!isUIntN(Width, Val - 1))

        return Error(Parser, "only values from 1 to 32 are legal");

      return true;

    }

  } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);

  ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});


  if (Res.isNoMatch())

    Res = parseHwregFunc(HwReg, Offset, Width);


  if (Res.isSuccess()) {

    if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))

      return ParseStatus::Failure;

    ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);

  }


  if (Res.isNoMatch() &&

      parseExpr(ImmVal, "a hwreg macro, structured immediate"))

    Res = ParseStatus::Success;


  if (!Res.isSuccess())

    return ParseStatus::Failure;


  if (!isUInt<16>(ImmVal))

    return Error(Loc, "invalid immediate: only 16-bit values are legal");

  Operands.push_back(

      AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));

  return ParseStatus::Success;

}


bool AMDGPUOperand::isHwreg() const {

  return isImmTy(ImmTyHwreg);

}


//===----------------------------------------------------------------------===//

// sendmsg

//===----------------------------------------------------------------------===//


bool

AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,

                                  OperandInfoTy &Op,

                                  OperandInfoTy &Stream) {

  using namespace llvm::AMDGPU::SendMsg;


  Msg.Loc = getLoc();

  if (isToken(AsmToken::Identifier) &&

      (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {

    Msg.IsSymbolic = true;

    lex(); // skip message name

  } else if (!parseExpr(Msg.Val, "a message name")) {

    return false;

  }


  if (trySkipToken(AsmToken::Comma)) {

    Op.IsDefined = true;

    Op.Loc = getLoc();

    if (isToken(AsmToken::Identifier) &&

        (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=

            OPR_ID_UNKNOWN) {

      lex(); // skip operation name

    } else if (!parseExpr(Op.Val, "an operation name")) {

      return false;

    }


    if (trySkipToken(AsmToken::Comma)) {

      Stream.IsDefined = true;

      Stream.Loc = getLoc();

      if (!parseExpr(Stream.Val))

        return false;

    }

  }


  return skipToken(AsmToken::RParen, "expected a closing parenthesis");

}


bool

AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,

                                 const OperandInfoTy &Op,

                                 const OperandInfoTy &Stream) {

  using namespace llvm::AMDGPU::SendMsg;


  // Validation strictness depends on whether message is specified

  // in a symbolic or in a numeric form. In the latter case

  // only encoding possibility is checked.

  bool Strict = Msg.IsSymbolic;


  if (Strict) {

    if (Msg.Val == OPR_ID_UNSUPPORTED) {

      Error(Msg.Loc, "specified message id is not supported on this GPU");

      return false;

    }

  } else {

    if (!isValidMsgId(Msg.Val, getSTI())) {

      Error(Msg.Loc, "invalid message id");

      return false;

    }

  }

  if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {

    if (Op.IsDefined) {

      Error(Op.Loc, "message does not support operations");

    } else {

      Error(Msg.Loc, "missing message operation");

    }

    return false;

  }

  if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {

    if (Op.Val == OPR_ID_UNSUPPORTED)

      Error(Op.Loc, "specified operation id is not supported on this GPU");

    else

      Error(Op.Loc, "invalid operation id");

    return false;

  }

  if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&

      Stream.IsDefined) {

    Error(Stream.Loc, "message operation does not support streams");

    return false;

  }

  if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {

    Error(Stream.Loc, "invalid message stream id");

    return false;

  }

  return true;

}


ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {

  using namespace llvm::AMDGPU::SendMsg;


  int64_t ImmVal = 0;

  SMLoc Loc = getLoc();


  if (trySkipId("sendmsg", AsmToken::LParen)) {

    OperandInfoTy Msg(OPR_ID_UNKNOWN);

    OperandInfoTy Op(OP_NONE_);

    OperandInfoTy Stream(STREAM_ID_NONE_);

    if (parseSendMsgBody(Msg, Op, Stream) &&

        validateSendMsg(Msg, Op, Stream)) {

      ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);

    } else {

      return ParseStatus::Failure;

    }

  } else if (parseExpr(ImmVal, "a sendmsg macro")) {

    if (ImmVal < 0 || !isUInt<16>(ImmVal))

      return Error(Loc, "invalid immediate: only 16-bit values are legal");

  } else {

    return ParseStatus::Failure;

  }


  Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));

  return ParseStatus::Success;

}


bool AMDGPUOperand::isSendMsg() const {

  return isImmTy(ImmTySendMsg);

}


//===----------------------------------------------------------------------===//

// v_interp

//===----------------------------------------------------------------------===//


ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {

  StringRef Str;

  SMLoc S = getLoc();


  if (!parseId(Str))

    return ParseStatus::NoMatch;


  int Slot = StringSwitch<int>(Str)

    .Case("p10", 0)

    .Case("p20", 1)

    .Case("p0", 2)

    .Default(-1);


  if (Slot == -1)

    return Error(S, "invalid interpolation slot");


  Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,

                                              AMDGPUOperand::ImmTyInterpSlot));

  return ParseStatus::Success;

}


ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {

  StringRef Str;

  SMLoc S = getLoc();


  if (!parseId(Str))

    return ParseStatus::NoMatch;


  if (!Str.starts_with("attr"))

    return Error(S, "invalid interpolation attribute");


  StringRef Chan = Str.take_back(2);

  int AttrChan = StringSwitch<int>(Chan)

    .Case(".x", 0)

    .Case(".y", 1)

    .Case(".z", 2)

    .Case(".w", 3)

    .Default(-1);

  if (AttrChan == -1)

    return Error(S, "invalid or missing interpolation attribute channel");


  Str = Str.drop_back(2).drop_front(4);


  uint8_t Attr;

  if (Str.getAsInteger(10, Attr))

    return Error(S, "invalid or missing interpolation attribute number");


  if (Attr > 32)

    return Error(S, "out of bounds interpolation attribute number");


  SMLoc SChan = SMLoc::getFromPointer(Chan.data());


  Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,

                                              AMDGPUOperand::ImmTyInterpAttr));

  Operands.push_back(AMDGPUOperand::CreateImm(

      this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));

  return ParseStatus::Success;

}


//===----------------------------------------------------------------------===//

// exp

//===----------------------------------------------------------------------===//


ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {

  using namespace llvm::AMDGPU::Exp;


  StringRef Str;

  SMLoc S = getLoc();


  if (!parseId(Str))

    return ParseStatus::NoMatch;


  unsigned Id = getTgtId(Str);

  if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))

    return Error(S, (Id == ET_INVALID)

                        ? "invalid exp target"

                        : "exp target is not supported on this GPU");


  Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,

                                              AMDGPUOperand::ImmTyExpTgt));

  return ParseStatus::Success;

}


//===----------------------------------------------------------------------===//

// parser helpers

//===----------------------------------------------------------------------===//


bool

AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {

  return Token.is(AsmToken::Identifier) && Token.getString() == Id;

}


bool

AMDGPUAsmParser::isId(const StringRef Id) const {

  return isId(getToken(), Id);

}


bool

AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {

  return getTokenKind() == Kind;

}


StringRef AMDGPUAsmParser::getId() const {

  return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();

}


bool

AMDGPUAsmParser::trySkipId(const StringRef Id) {

  if (isId(Id)) {

    lex();

    return true;

  }

  return false;

}


bool

AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {

  if (isToken(AsmToken::Identifier)) {

    StringRef Tok = getTokenStr();

    if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {

      lex();

      return true;

    }

  }

  return false;

}


bool

AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {

  if (isId(Id) && peekToken().is(Kind)) {

    lex();

    lex();

    return true;

  }

  return false;

}


bool

AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {

  if (isToken(Kind)) {

    lex();

    return true;

  }

  return false;

}


bool

AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,

                           const StringRef ErrMsg) {

  if (!trySkipToken(Kind)) {

    Error(getLoc(), ErrMsg);

    return false;

  }

  return true;

}


bool

AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {

  SMLoc S = getLoc();


  const MCExpr *Expr;

  if (Parser.parseExpression(Expr))

    return false;


  if (Expr->evaluateAsAbsolute(Imm))

    return true;


  if (Expected.empty()) {

    Error(S, "expected absolute expression");

  } else {

    Error(S, Twine("expected ", Expected) +

             Twine(" or an absolute expression"));

  }

  return false;

}


bool

AMDGPUAsmParser::parseExpr(OperandVector &Operands) {

  SMLoc S = getLoc();


  const MCExpr *Expr;

  if (Parser.parseExpression(Expr))

    return false;


  int64_t IntVal;

  if (Expr->evaluateAsAbsolute(IntVal)) {

    Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));

  } else {

    Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));

  }

  return true;

}


bool

AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {

  if (isToken(AsmToken::String)) {

    Val = getToken().getStringContents();

    lex();

    return true;

  }

  Error(getLoc(), ErrMsg);

  return false;

}


bool

AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {

  if (isToken(AsmToken::Identifier)) {

    Val = getTokenStr();

    lex();

    return true;

  }

  if (!ErrMsg.empty())

    Error(getLoc(), ErrMsg);

  return false;

}


AsmToken

AMDGPUAsmParser::getToken() const {

  return Parser.getTok();

}


AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {

  return isToken(AsmToken::EndOfStatement)

             ? getToken()

             : getLexer().peekTok(ShouldSkipSpace);

}


void

AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {

  auto TokCount = getLexer().peekTokens(Tokens);


  for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)

    Tokens[Idx] = AsmToken(AsmToken::Error, "");

}


AsmToken::TokenKind

AMDGPUAsmParser::getTokenKind() const {

  return getLexer().getKind();

}


SMLoc

AMDGPUAsmParser::getLoc() const {

  return getToken().getLoc();

}


StringRef

AMDGPUAsmParser::getTokenStr() const {

  return getToken().getString();

}


void

AMDGPUAsmParser::lex() {

  Parser.Lex();

}


SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {

  return ((AMDGPUOperand &)*Operands[0]).getStartLoc();

}


SMLoc

AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,

                               const OperandVector &Operands) const {

  for (unsigned i = Operands.size() - 1; i > 0; --i) {

    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);

    if (Test(Op))

      return Op.getStartLoc();

  }

  return getInstLoc(Operands);

}


SMLoc

AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,

                           const OperandVector &Operands) const {

  auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };

  return getOperandLoc(Test, Operands);

}


SMLoc AMDGPUAsmParser::getRegLoc(MCRegister Reg,

                                 const OperandVector &Operands) const {

  auto Test = [=](const AMDGPUOperand& Op) {

    return Op.isRegKind() && Op.getReg() == Reg;

  };

  return getOperandLoc(Test, Operands);

}


SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands,

                                 bool SearchMandatoryLiterals) const {

  auto Test = [](const AMDGPUOperand& Op) {

    return Op.IsImmKindLiteral() || Op.isExpr();

  };

  SMLoc Loc = getOperandLoc(Test, Operands);

  if (SearchMandatoryLiterals && Loc == getInstLoc(Operands))

    Loc = getMandatoryLitLoc(Operands);

  return Loc;

}


SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const {

  auto Test = [](const AMDGPUOperand &Op) {

    return Op.IsImmKindMandatoryLiteral();

  };

  return getOperandLoc(Test, Operands);

}


SMLoc

AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {

  auto Test = [](const AMDGPUOperand& Op) {

    return Op.isImmKindConst();

  };

  return getOperandLoc(Test, Operands);

}


ParseStatus

AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {

  if (!trySkipToken(AsmToken::LCurly))

    return ParseStatus::NoMatch;


  bool First = true;

  while (!trySkipToken(AsmToken::RCurly)) {

    if (!First &&

        !skipToken(AsmToken::Comma, "comma or closing brace expected"))

      return ParseStatus::Failure;


    StringRef Id = getTokenStr();

    SMLoc IdLoc = getLoc();

    if (!skipToken(AsmToken::Identifier, "field name expected") ||

        !skipToken(AsmToken::Colon, "colon expected"))

      return ParseStatus::Failure;


    const auto *I =

        find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });

    if (I == Fields.end())

      return Error(IdLoc, "unknown field");

    if ((*I)->IsDefined)

      return Error(IdLoc, "duplicate field");


    // TODO: Support symbolic values.

    (*I)->Loc = getLoc();

    if (!parseExpr((*I)->Val))

      return ParseStatus::Failure;

    (*I)->IsDefined = true;


    First = false;

  }

  return ParseStatus::Success;

}


bool AMDGPUAsmParser::validateStructuredOpFields(

    ArrayRef<const StructuredOpField *> Fields) {

  return all_of(Fields, [this](const StructuredOpField *F) {

    return F->validate(*this);

  });

}


//===----------------------------------------------------------------------===//

// swizzle

//===----------------------------------------------------------------------===//


LLVM_READNONE

static unsigned


encodeBitmaskPerm(const unsigned AndMask,

                  const unsigned OrMask,

                  const unsigned XorMask) {

  using namespace llvm::AMDGPU::Swizzle;


  return BITMASK_PERM_ENC |

         (AndMask << BITMASK_AND_SHIFT) |

         (OrMask  << BITMASK_OR_SHIFT)  |

         (XorMask << BITMASK_XOR_SHIFT);

}


bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, const unsigned MinVal,

                                          const unsigned MaxVal,

                                          const Twine &ErrMsg, SMLoc &Loc) {

  if (!skipToken(AsmToken::Comma, "expected a comma")) {

    return false;

  }

  Loc = getLoc();

  if (!parseExpr(Op)) {

    return false;

  }

  if (Op < MinVal || Op > MaxVal) {

    Error(Loc, ErrMsg);

    return false;

  }


  return true;

}


bool

AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,

                                      const unsigned MinVal,

                                      const unsigned MaxVal,

                                      const StringRef ErrMsg) {

  SMLoc Loc;

  for (unsigned i = 0; i < OpNum; ++i) {

    if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))

      return false;

  }


  return true;

}


bool

AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {

  using namespace llvm::AMDGPU::Swizzle;


  int64_t Lane[LANE_NUM];

  if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,

                           "expected a 2-bit lane id")) {

    Imm = QUAD_PERM_ENC;

    for (unsigned I = 0; I < LANE_NUM; ++I) {

      Imm |= Lane[I] << (LANE_SHIFT * I);

    }

    return true;

  }

  return false;

}


bool

AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {

  using namespace llvm::AMDGPU::Swizzle;


  SMLoc Loc;

  int64_t GroupSize;

  int64_t LaneIdx;


  if (!parseSwizzleOperand(GroupSize,

                           2, 32,

                           "group size must be in the interval [2,32]",

                           Loc)) {

    return false;

  }

  if (!isPowerOf2_64(GroupSize)) {

    Error(Loc, "group size must be a power of two");

    return false;

  }

  if (parseSwizzleOperand(LaneIdx,

                          0, GroupSize - 1,

                          "lane id must be in the interval [0,group size - 1]",

                          Loc)) {

    Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);

    return true;

  }

  return false;

}


bool

AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {

  using namespace llvm::AMDGPU::Swizzle;


  SMLoc Loc;

  int64_t GroupSize;


  if (!parseSwizzleOperand(GroupSize,

                           2, 32,

                           "group size must be in the interval [2,32]",

                           Loc)) {

    return false;

  }

  if (!isPowerOf2_64(GroupSize)) {

    Error(Loc, "group size must be a power of two");

    return false;

  }


  Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);

  return true;

}


bool

AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {

  using namespace llvm::AMDGPU::Swizzle;


  SMLoc Loc;

  int64_t GroupSize;


  if (!parseSwizzleOperand(GroupSize,

                           1, 16,

                           "group size must be in the interval [1,16]",

                           Loc)) {

    return false;

  }

  if (!isPowerOf2_64(GroupSize)) {

    Error(Loc, "group size must be a power of two");

    return false;

  }


  Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);

  return true;

}


bool

AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {

  using namespace llvm::AMDGPU::Swizzle;


  if (!skipToken(AsmToken::Comma, "expected a comma")) {

    return false;

  }


  StringRef Ctl;

  SMLoc StrLoc = getLoc();

  if (!parseString(Ctl)) {

    return false;

  }

  if (Ctl.size() != BITMASK_WIDTH) {

    Error(StrLoc, "expected a 5-character mask");

    return false;

  }


  unsigned AndMask = 0;

  unsigned OrMask = 0;

  unsigned XorMask = 0;


  for (size_t i = 0; i < Ctl.size(); ++i) {

    unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);

    switch(Ctl[i]) {

    default:

      Error(StrLoc, "invalid mask");

      return false;

    case '0':

      break;

    case '1':

      OrMask |= Mask;

      break;

    case 'p':

      AndMask |= Mask;

      break;

    case 'i':

      AndMask |= Mask;

      XorMask |= Mask;

      break;

    }

  }


  Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);

  return true;

}


bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &Imm) {

  using namespace llvm::AMDGPU::Swizzle;


  if (!AMDGPU::isGFX9Plus(getSTI())) {

    Error(getLoc(), "FFT mode swizzle not supported on this GPU");

    return false;

  }


  int64_t Swizzle;

  SMLoc Loc;

  if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,

                           "FFT swizzle must be in the interval [0," +

                               Twine(FFT_SWIZZLE_MAX) + Twine(']'),

                           Loc))

    return false;


  Imm = FFT_MODE_ENC | Swizzle;

  return true;

}


bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &Imm) {

  using namespace llvm::AMDGPU::Swizzle;


  if (!AMDGPU::isGFX9Plus(getSTI())) {

    Error(getLoc(), "Rotate mode swizzle not supported on this GPU");

    return false;

  }


  SMLoc Loc;

  int64_t Direction;


  if (!parseSwizzleOperand(Direction, 0, 1,

                           "direction must be 0 (left) or 1 (right)", Loc))

    return false;


  int64_t RotateSize;

  if (!parseSwizzleOperand(

          RotateSize, 0, ROTATE_MAX_SIZE,

          "number of threads to rotate must be in the interval [0," +

              Twine(ROTATE_MAX_SIZE) + Twine(']'),

          Loc))

    return false;


  Imm = ROTATE_MODE_ENC | (Direction << ROTATE_DIR_SHIFT) |

        (RotateSize << ROTATE_SIZE_SHIFT);

  return true;

}


bool

AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {


  SMLoc OffsetLoc = getLoc();


  if (!parseExpr(Imm, "a swizzle macro")) {

    return false;

  }

  if (!isUInt<16>(Imm)) {

    Error(OffsetLoc, "expected a 16-bit offset");

    return false;

  }

  return true;

}


bool

AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {

  using namespace llvm::AMDGPU::Swizzle;


  if (skipToken(AsmToken::LParen, "expected a left parentheses")) {


    SMLoc ModeLoc = getLoc();

    bool Ok = false;


    if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {

      Ok = parseSwizzleQuadPerm(Imm);

    } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {

      Ok = parseSwizzleBitmaskPerm(Imm);

    } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {

      Ok = parseSwizzleBroadcast(Imm);

    } else if (trySkipId(IdSymbolic[ID_SWAP])) {

      Ok = parseSwizzleSwap(Imm);

    } else if (trySkipId(IdSymbolic[ID_REVERSE])) {

      Ok = parseSwizzleReverse(Imm);

    } else if (trySkipId(IdSymbolic[ID_FFT])) {

      Ok = parseSwizzleFFT(Imm);

    } else if (trySkipId(IdSymbolic[ID_ROTATE])) {

      Ok = parseSwizzleRotate(Imm);

    } else {

      Error(ModeLoc, "expected a swizzle mode");

    }


    return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");

  }


  return false;

}


ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {

  SMLoc S = getLoc();

  int64_t Imm = 0;


  if (trySkipId("offset")) {


    bool Ok = false;

    if (skipToken(AsmToken::Colon, "expected a colon")) {

      if (trySkipId("swizzle")) {

        Ok = parseSwizzleMacro(Imm);

      } else {

        Ok = parseSwizzleOffset(Imm);

      }

    }


    Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));


    return Ok ? ParseStatus::Success : ParseStatus::Failure;

  }

  return ParseStatus::NoMatch;

}


bool

AMDGPUOperand::isSwizzle() const {

  return isImmTy(ImmTySwizzle);

}


//===----------------------------------------------------------------------===//

// VGPR Index Mode

//===----------------------------------------------------------------------===//


int64_t AMDGPUAsmParser::parseGPRIdxMacro() {


  using namespace llvm::AMDGPU::VGPRIndexMode;


  if (trySkipToken(AsmToken::RParen)) {

    return OFF;

  }


  int64_t Imm = 0;


  while (true) {

    unsigned Mode = 0;

    SMLoc S = getLoc();


    for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {

      if (trySkipId(IdSymbolic[ModeId])) {

        Mode = 1 << ModeId;

        break;

      }

    }


    if (Mode == 0) {

      Error(S, (Imm == 0)?

               "expected a VGPR index mode or a closing parenthesis" :

               "expected a VGPR index mode");

      return UNDEF;

    }


    if (Imm & Mode) {

      Error(S, "duplicate VGPR index mode");

      return UNDEF;

    }

    Imm |= Mode;


    if (trySkipToken(AsmToken::RParen))

      break;

    if (!skipToken(AsmToken::Comma,

                   "expected a comma or a closing parenthesis"))

      return UNDEF;

  }


  return Imm;

}


ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {


  using namespace llvm::AMDGPU::VGPRIndexMode;


  int64_t Imm = 0;

  SMLoc S = getLoc();


  if (trySkipId("gpr_idx", AsmToken::LParen)) {

    Imm = parseGPRIdxMacro();

    if (Imm == UNDEF)

      return ParseStatus::Failure;

  } else {

    if (getParser().parseAbsoluteExpression(Imm))

      return ParseStatus::Failure;

    if (Imm < 0 || !isUInt<4>(Imm))

      return Error(S, "invalid immediate: only 4-bit values are legal");

  }


  Operands.push_back(

      AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));

  return ParseStatus::Success;

}


bool AMDGPUOperand::isGPRIdxMode() const {

  return isImmTy(ImmTyGprIdxMode);

}


//===----------------------------------------------------------------------===//

// sopp branch targets

//===----------------------------------------------------------------------===//


ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {


  // Make sure we are not parsing something

  // that looks like a label or an expression but is not.

  // This will improve error messages.

  if (isRegister() || isModifier())

    return ParseStatus::NoMatch;


  if (!parseExpr(Operands))

    return ParseStatus::Failure;


  AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);

  assert(Opr.isImm() || Opr.isExpr());

  SMLoc Loc = Opr.getStartLoc();


  // Currently we do not support arbitrary expressions as branch targets.

  // Only labels and absolute expressions are accepted.

  if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {

    Error(Loc, "expected an absolute expression or a label");

  } else if (Opr.isImm() && !Opr.isS16Imm()) {

    Error(Loc, "expected a 16-bit signed jump offset");

  }


  return ParseStatus::Success;

}


//===----------------------------------------------------------------------===//

// Boolean holding registers

//===----------------------------------------------------------------------===//


ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {

  return parseReg(Operands);

}


//===----------------------------------------------------------------------===//

// mubuf

//===----------------------------------------------------------------------===//


void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,

                                   const OperandVector &Operands,

                                   bool IsAtomic) {

  OptionalImmIndexMap OptionalIdx;

  unsigned FirstOperandIdx = 1;

  bool IsAtomicReturn = false;


  if (IsAtomic) {

    IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &

                      SIInstrFlags::IsAtomicRet;

  }


  for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {

    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);


    // Add the register arguments

    if (Op.isReg()) {

      Op.addRegOperands(Inst, 1);

      // Insert a tied src for atomic return dst.

      // This cannot be postponed as subsequent calls to

      // addImmOperands rely on correct number of MC operands.

      if (IsAtomicReturn && i == FirstOperandIdx)

        Op.addRegOperands(Inst, 1);

      continue;

    }


    // Handle the case where soffset is an immediate

    if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {

      Op.addImmOperands(Inst, 1);

      continue;

    }


    // Handle tokens like 'offen' which are sometimes hard-coded into the

    // asm string.  There are no MCInst operands for these.

    if (Op.isToken()) {

      continue;

    }

    assert(Op.isImm());


    // Handle optional arguments

    OptionalIdx[Op.getImmTy()] = i;

  }


  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);

  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);

}


//===----------------------------------------------------------------------===//

// smrd

//===----------------------------------------------------------------------===//


bool AMDGPUOperand::isSMRDOffset8() const {

  return isImmLiteral() && isUInt<8>(getImm());

}


bool AMDGPUOperand::isSMEMOffset() const {

  // Offset range is checked later by validator.

  return isImmLiteral();

}


bool AMDGPUOperand::isSMRDLiteralOffset() const {

  // 32-bit literals are only supported on CI and we only want to use them

  // when the offset is > 8-bits.

  return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());

}


//===----------------------------------------------------------------------===//

// vop3

//===----------------------------------------------------------------------===//


static bool ConvertOmodMul(int64_t &Mul) {

  if (Mul != 1 && Mul != 2 && Mul != 4)

    return false;


  Mul >>= 1;

  return true;

}


static bool ConvertOmodDiv(int64_t &Div) {

  if (Div == 1) {

    Div = 0;

    return true;

  }


  if (Div == 2) {

    Div = 3;

    return true;

  }


  return false;

}


// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.

// This is intentional and ensures compatibility with sp3.

// See bug 35397 for details.

bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {

  if (BoundCtrl == 0 || BoundCtrl == 1) {

    if (!isGFX11Plus())

      BoundCtrl = 1;

    return true;

  }

  return false;

}


void AMDGPUAsmParser::onBeginOfFile() {

  if (!getParser().getStreamer().getTargetStreamer() ||

      getSTI().getTargetTriple().getArch() == Triple::r600)

    return;


  if (!getTargetStreamer().getTargetID())

    getTargetStreamer().initializeTargetID(getSTI(),

                                           getSTI().getFeatureString());


  if (isHsaAbi(getSTI()))

    getTargetStreamer().EmitDirectiveAMDGCNTarget();

}


/// Parse AMDGPU specific expressions.

///

///  expr ::= or(expr, ...) |

///           max(expr, ...)

///

bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {

  using AGVK = AMDGPUMCExpr::VariantKind;


  if (isToken(AsmToken::Identifier)) {

    StringRef TokenId = getTokenStr();

    AGVK VK = StringSwitch<AGVK>(TokenId)

                  .Case("max", AGVK::AGVK_Max)

                  .Case("or", AGVK::AGVK_Or)

                  .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs)

                  .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)

                  .Case("alignto", AGVK::AGVK_AlignTo)

                  .Case("occupancy", AGVK::AGVK_Occupancy)

                  .Default(AGVK::AGVK_None);


    if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {

      SmallVector<const MCExpr *, 4> Exprs;

      uint64_t CommaCount = 0;

      lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)

      lex(); // Eat '('

      while (true) {

        if (trySkipToken(AsmToken::RParen)) {

          if (Exprs.empty()) {

            Error(getToken().getLoc(),

                  "empty " + Twine(TokenId) + " expression");

            return true;

          }

          if (CommaCount + 1 != Exprs.size()) {

            Error(getToken().getLoc(),

                  "mismatch of commas in " + Twine(TokenId) + " expression");

            return true;

          }

          Res = AMDGPUMCExpr::create(VK, Exprs, getContext());

          return false;

        }

        const MCExpr *Expr;

        if (getParser().parseExpression(Expr, EndLoc))

          return true;

        Exprs.push_back(Expr);

        bool LastTokenWasComma = trySkipToken(AsmToken::Comma);

        if (LastTokenWasComma)

          CommaCount++;

        if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {

          Error(getToken().getLoc(),

                "unexpected token in " + Twine(TokenId) + " expression");

          return true;

        }

      }

    }

  }

  return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);

}


ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {

  StringRef Name = getTokenStr();

  if (Name == "mul") {

    return parseIntWithPrefix("mul", Operands,

                              AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);

  }


  if (Name == "div") {

    return parseIntWithPrefix("div", Operands,

                              AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);

  }


  return ParseStatus::NoMatch;

}


// Determines which bit DST_OP_SEL occupies in the op_sel operand according to

// the number of src operands present, then copies that bit into src0_modifiers.


static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {

  int Opc = Inst.getOpcode();

  int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);

  if (OpSelIdx == -1)

    return;


  int SrcNum;

  const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,

                                AMDGPU::OpName::src2};

  for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);

       ++SrcNum)

    ;

  assert(SrcNum > 0);


  unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();


  int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);

  if (DstIdx == -1)

    return;


  const MCOperand &DstOp = Inst.getOperand(DstIdx);

  int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);

  uint32_t ModVal = Inst.getOperand(ModIdx).getImm();

  if (DstOp.isReg() &&

      MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {

    if (AMDGPU::isHi16Reg(DstOp.getReg(), MRI))

      ModVal |= SISrcMods::DST_OP_SEL;

  } else {

    if ((OpSel & (1 << SrcNum)) != 0)

      ModVal |= SISrcMods::DST_OP_SEL;

  }

  Inst.getOperand(ModIdx).setImm(ModVal);

}


void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,

                                   const OperandVector &Operands) {

  cvtVOP3P(Inst, Operands);

  cvtVOP3DstOpSelOnly(Inst, *getMRI());

}


void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,

                                   OptionalImmIndexMap &OptionalIdx) {

  cvtVOP3P(Inst, Operands, OptionalIdx);

  cvtVOP3DstOpSelOnly(Inst, *getMRI());

}


static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {

  return

      // 1. This operand is input modifiers

      Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS

      // 2. This is not last operand

      && Desc.NumOperands > (OpNum + 1)

      // 3. Next operand is register class

      && Desc.operands()[OpNum + 1].RegClass != -1

      // 4. Next register is not tied to any other operand

      && Desc.getOperandConstraint(OpNum + 1,

                                   MCOI::OperandConstraint::TIED_TO) == -1;

}


void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)

{

  OptionalImmIndexMap OptionalIdx;

  unsigned Opc = Inst.getOpcode();


  unsigned I = 1;

  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());

  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {

    ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);

  }


  for (unsigned E = Operands.size(); I != E; ++I) {

    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);

    if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {

      Op.addRegOrImmWithFPInputModsOperands(Inst, 2);

    } else if (Op.isInterpSlot() || Op.isInterpAttr() ||

               Op.isInterpAttrChan()) {

      Inst.addOperand(MCOperand::createImm(Op.getImm()));

    } else if (Op.isImmModifier()) {

      OptionalIdx[Op.getImmTy()] = I;

    } else {

      llvm_unreachable("unhandled operand type");

    }

  }


  if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))

    addOptionalImmOperand(Inst, Operands, OptionalIdx,

                          AMDGPUOperand::ImmTyHigh);


  if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))

    addOptionalImmOperand(Inst, Operands, OptionalIdx,

                          AMDGPUOperand::ImmTyClamp);


  if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))

    addOptionalImmOperand(Inst, Operands, OptionalIdx,

                          AMDGPUOperand::ImmTyOModSI);

}


void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)

{

  OptionalImmIndexMap OptionalIdx;

  unsigned Opc = Inst.getOpcode();


  unsigned I = 1;

  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());

  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {

    ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);

  }


  for (unsigned E = Operands.size(); I != E; ++I) {

    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);

    if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {

      Op.addRegOrImmWithFPInputModsOperands(Inst, 2);

    } else if (Op.isImmModifier()) {

      OptionalIdx[Op.getImmTy()] = I;

    } else {

      llvm_unreachable("unhandled operand type");

    }

  }


  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp);


  int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);

  if (OpSelIdx != -1)

    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);


  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);


  if (OpSelIdx == -1)

    return;


  const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,

                                AMDGPU::OpName::src2};

  const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,

                                   AMDGPU::OpName::src1_modifiers,

                                   AMDGPU::OpName::src2_modifiers};


  unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();


  for (int J = 0; J < 3; ++J) {

    int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);

    if (OpIdx == -1)

      break;


    int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);

    uint32_t ModVal = Inst.getOperand(ModIdx).getImm();


    if ((OpSel & (1 << J)) != 0)

      ModVal |= SISrcMods::OP_SEL_0;

    if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&

        (OpSel & (1 << 3)) != 0)

      ModVal |= SISrcMods::DST_OP_SEL;


    Inst.getOperand(ModIdx).setImm(ModVal);

  }

}

void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,

                                    const OperandVector &Operands) {

  OptionalImmIndexMap OptionalIdx;

  unsigned Opc = Inst.getOpcode();

  unsigned I = 1;

  int CbszOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);


  const MCInstrDesc &Desc = MII.get(Opc);


  for (unsigned J = 0; J < Desc.getNumDefs(); ++J)

    static_cast<AMDGPUOperand &>(*Operands[I++]).addRegOperands(Inst, 1);


  for (unsigned E = Operands.size(); I != E; ++I) {

    AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[I]);

    int NumOperands = Inst.getNumOperands();

    // The order of operands in MCInst and parsed operands are different.

    // Adding dummy cbsz and blgp operands at corresponding MCInst operand

    // indices for parsing scale values correctly.

    if (NumOperands == CbszOpIdx) {

      Inst.addOperand(MCOperand::createImm(0));

      Inst.addOperand(MCOperand::createImm(0));

    }

    if (isRegOrImmWithInputMods(Desc, NumOperands)) {

      Op.addRegOrImmWithFPInputModsOperands(Inst, 2);

    } else if (Op.isImmModifier()) {

      OptionalIdx[Op.getImmTy()] = I;

    } else {

      Op.addRegOrImmOperands(Inst, 1);

    }

  }


  // Insert CBSZ and BLGP operands for F8F6F4 variants

  auto CbszIdx = OptionalIdx.find(AMDGPUOperand::ImmTyCBSZ);

  if (CbszIdx != OptionalIdx.end()) {

    int CbszVal = ((AMDGPUOperand &)*Operands[CbszIdx->second]).getImm();

    Inst.getOperand(CbszOpIdx).setImm(CbszVal);

  }


  int BlgpOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);

  auto BlgpIdx = OptionalIdx.find(AMDGPUOperand::ImmTyBLGP);

  if (BlgpIdx != OptionalIdx.end()) {

    int BlgpVal = ((AMDGPUOperand &)*Operands[BlgpIdx->second]).getImm();

    Inst.getOperand(BlgpOpIdx).setImm(BlgpVal);

  }


  // Add dummy src_modifiers

  Inst.addOperand(MCOperand::createImm(0));

  Inst.addOperand(MCOperand::createImm(0));


  // Handle op_sel fields


  unsigned OpSel = 0;

  auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);

  if (OpselIdx != OptionalIdx.end()) {

    OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])

                .getImm();

  }


  unsigned OpSelHi = 0;

  auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);

  if (OpselHiIdx != OptionalIdx.end()) {

    OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])

                  .getImm();

  }

  const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,

                                   AMDGPU::OpName::src1_modifiers};


  for (unsigned J = 0; J < 2; ++J) {

    unsigned ModVal = 0;

    if (OpSel & (1 << J))

      ModVal |= SISrcMods::OP_SEL_0;

    if (OpSelHi & (1 << J))

      ModVal |= SISrcMods::OP_SEL_1;


    const int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);

    Inst.getOperand(ModIdx).setImm(ModVal);

  }

}


void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,

                              OptionalImmIndexMap &OptionalIdx) {

  unsigned Opc = Inst.getOpcode();


  unsigned I = 1;

  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());

  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {

    ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);

  }


  for (unsigned E = Operands.size(); I != E; ++I) {

    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);

    if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {

      Op.addRegOrImmWithFPInputModsOperands(Inst, 2);

    } else if (Op.isImmModifier()) {

      OptionalIdx[Op.getImmTy()] = I;

    } else {

      Op.addRegOrImmOperands(Inst, 1);

    }

  }


  if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::scale_sel))

    addOptionalImmOperand(Inst, Operands, OptionalIdx,

                          AMDGPUOperand::ImmTyScaleSel);


  if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))

    addOptionalImmOperand(Inst, Operands, OptionalIdx,

                          AMDGPUOperand::ImmTyClamp);


  if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {

    if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))

      Inst.addOperand(Inst.getOperand(0));

    addOptionalImmOperand(Inst, Operands, OptionalIdx,

                          AMDGPUOperand::ImmTyByteSel);

  }


  if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))

    addOptionalImmOperand(Inst, Operands, OptionalIdx,

                          AMDGPUOperand::ImmTyOModSI);


  // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):

  // it has src2 register operand that is tied to dst operand

  // we don't allow modifiers for this operand in assembler so src2_modifiers

  // should be 0.

  if (isMAC(Opc)) {

    auto *it = Inst.begin();

    std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));

    it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2

    ++it;

    // Copy the operand to ensure it's not invalidated when Inst grows.

    Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst

  }

}


void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {

  OptionalImmIndexMap OptionalIdx;

  cvtVOP3(Inst, Operands, OptionalIdx);

}


void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,

                               OptionalImmIndexMap &OptIdx) {

  const int Opc = Inst.getOpcode();

  const MCInstrDesc &Desc = MII.get(Opc);


  const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;


  if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||

      Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||

      Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||

      Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||

      Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||

      Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {

    Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods

    Inst.addOperand(Inst.getOperand(0));

  }


  // Adding vdst_in operand is already covered for these DPP instructions in

  // cvtVOP3DPP.

  if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&

      !(Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx12 ||

        Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx12 ||

        Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx12 ||

        Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx12 ||

        Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp_gfx12 ||

        Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp_gfx12 ||

        Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp8_gfx12 ||

        Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp8_gfx12 ||

        Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||

        Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||

        Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp_gfx1250 ||

        Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp8_gfx1250 ||

        Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||

        Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||

        Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp_gfx1250 ||

        Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp_gfx1250 ||

        Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp8_gfx1250 ||

        Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp8_gfx1250 ||

        Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_gfx1250 ||

        Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_gfx1250 ||

        Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp_gfx1250 ||

        Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp_gfx1250 ||

        Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp8_gfx1250 ||

        Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp8_gfx1250 ||

        Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_gfx1250 ||

        Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_gfx1250)) {

    Inst.addOperand(Inst.getOperand(0));

  }


  int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::bitop3);

  if (BitOp3Idx != -1) {

    addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);

  }


  // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3

  // instruction, and then figure out where to actually put the modifiers


  int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);

  if (OpSelIdx != -1) {

    addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);

  }


  int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);

  if (OpSelHiIdx != -1) {

    int DefaultVal = IsPacked ? -1 : 0;

    addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,

                          DefaultVal);

  }


  int MatrixAFMTIdx =

      AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_fmt);

  if (MatrixAFMTIdx != -1) {

    addOptionalImmOperand(Inst, Operands, OptIdx,

                          AMDGPUOperand::ImmTyMatrixAFMT, 0);

  }


  int MatrixBFMTIdx =

      AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_fmt);

  if (MatrixBFMTIdx != -1) {

    addOptionalImmOperand(Inst, Operands, OptIdx,

                          AMDGPUOperand::ImmTyMatrixBFMT, 0);

  }


  int MatrixAScaleIdx =

      AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale);

  if (MatrixAScaleIdx != -1) {

    addOptionalImmOperand(Inst, Operands, OptIdx,

                          AMDGPUOperand::ImmTyMatrixAScale, 0);

  }


  int MatrixBScaleIdx =

      AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale);

  if (MatrixBScaleIdx != -1) {

    addOptionalImmOperand(Inst, Operands, OptIdx,

                          AMDGPUOperand::ImmTyMatrixBScale, 0);

  }


  int MatrixAScaleFmtIdx =

      AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale_fmt);

  if (MatrixAScaleFmtIdx != -1) {

    addOptionalImmOperand(Inst, Operands, OptIdx,

                          AMDGPUOperand::ImmTyMatrixAScaleFmt, 0);

  }


  int MatrixBScaleFmtIdx =

      AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale_fmt);

  if (MatrixBScaleFmtIdx != -1) {

    addOptionalImmOperand(Inst, Operands, OptIdx,

                          AMDGPUOperand::ImmTyMatrixBScaleFmt, 0);

  }


  if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_a_reuse))

    addOptionalImmOperand(Inst, Operands, OptIdx,

                          AMDGPUOperand::ImmTyMatrixAReuse, 0);


  if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_b_reuse))

    addOptionalImmOperand(Inst, Operands, OptIdx,

                          AMDGPUOperand::ImmTyMatrixBReuse, 0);


  int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);

  if (NegLoIdx != -1)

    addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);


  int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);

  if (NegHiIdx != -1)

    addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);


  const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,

                                AMDGPU::OpName::src2};

  const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,

                                   AMDGPU::OpName::src1_modifiers,

                                   AMDGPU::OpName::src2_modifiers};


  unsigned OpSel = 0;

  unsigned OpSelHi = 0;

  unsigned NegLo = 0;

  unsigned NegHi = 0;


  if (OpSelIdx != -1)

    OpSel = Inst.getOperand(OpSelIdx).getImm();


  if (OpSelHiIdx != -1)

    OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();


  if (NegLoIdx != -1)

    NegLo = Inst.getOperand(NegLoIdx).getImm();


  if (NegHiIdx != -1)

    NegHi = Inst.getOperand(NegHiIdx).getImm();


  for (int J = 0; J < 3; ++J) {

    int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);

    if (OpIdx == -1)

      break;


    int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);


    if (ModIdx == -1)

      continue;


    uint32_t ModVal = 0;


    const MCOperand &SrcOp = Inst.getOperand(OpIdx);

    if (SrcOp.isReg() && getMRI()

                             ->getRegClass(AMDGPU::VGPR_16RegClassID)

                             .contains(SrcOp.getReg())) {

      bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(SrcOp.getReg(), *getMRI());

      if (VGPRSuffixIsHi)

        ModVal |= SISrcMods::OP_SEL_0;

    } else {

      if ((OpSel & (1 << J)) != 0)

        ModVal |= SISrcMods::OP_SEL_0;

    }


    if ((OpSelHi & (1 << J)) != 0)

      ModVal |= SISrcMods::OP_SEL_1;


    if ((NegLo & (1 << J)) != 0)

      ModVal |= SISrcMods::NEG;


    if ((NegHi & (1 << J)) != 0)

      ModVal |= SISrcMods::NEG_HI;


    Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);

  }

}


void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {

  OptionalImmIndexMap OptIdx;

  cvtVOP3(Inst, Operands, OptIdx);

  cvtVOP3P(Inst, Operands, OptIdx);

}


static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands,

                                  unsigned i, unsigned Opc,

                                  AMDGPU::OpName OpName) {

  if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)

    ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);

  else

    ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);

}


void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {

  unsigned Opc = Inst.getOpcode();


  ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);

  addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);

  addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);

  ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef

  ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2


  OptionalImmIndexMap OptIdx;

  for (unsigned i = 5; i < Operands.size(); ++i) {

    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);

    OptIdx[Op.getImmTy()] = i;

  }


  if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))

    addOptionalImmOperand(Inst, Operands, OptIdx,

                          AMDGPUOperand::ImmTyIndexKey8bit);


  if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))

    addOptionalImmOperand(Inst, Operands, OptIdx,

                          AMDGPUOperand::ImmTyIndexKey16bit);


  if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_32bit))

    addOptionalImmOperand(Inst, Operands, OptIdx,

                          AMDGPUOperand::ImmTyIndexKey32bit);


  if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))

    addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp);


  cvtVOP3P(Inst, Operands, OptIdx);

}


//===----------------------------------------------------------------------===//

// VOPD

//===----------------------------------------------------------------------===//


ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {

  if (!hasVOPD(getSTI()))

    return ParseStatus::NoMatch;


  if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {

    SMLoc S = getLoc();

    lex();

    lex();

    Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));

    SMLoc OpYLoc = getLoc();

    StringRef OpYName;

    if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {

      Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));

      return ParseStatus::Success;

    }

    return Error(OpYLoc, "expected a VOPDY instruction after ::");

  }

  return ParseStatus::NoMatch;

}


// Create VOPD MCInst operands using parsed assembler operands.

void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {

  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());


  auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer

    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);

    if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {

      Op.addRegOrImmWithFPInputModsOperands(Inst, 2);

      return;

    }

    if (Op.isReg()) {

      Op.addRegOperands(Inst, 1);

      return;

    }

    if (Op.isImm()) {

      Op.addImmOperands(Inst, 1);

      return;

    }

    llvm_unreachable("Unhandled operand type in cvtVOPD");

  };


  const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);


  // MCInst operands are ordered as follows:

  //   dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]


  for (auto CompIdx : VOPD::COMPONENTS) {

    addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());

  }


  for (auto CompIdx : VOPD::COMPONENTS) {

    const auto &CInfo = InstInfo[CompIdx];

    auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();

    for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)

      addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));

    if (CInfo.hasSrc2Acc())

      addOp(CInfo.getIndexOfDstInParsedOperands());

  }


  int BitOp3Idx =

      AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::bitop3);

  if (BitOp3Idx != -1) {

    OptionalImmIndexMap OptIdx;

    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back());

    if (Op.isImm())

      OptIdx[Op.getImmTy()] = Operands.size() - 1;


    addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);

  }

}


//===----------------------------------------------------------------------===//

// dpp

//===----------------------------------------------------------------------===//


bool AMDGPUOperand::isDPP8() const {

  return isImmTy(ImmTyDPP8);

}


bool AMDGPUOperand::isDPPCtrl() const {

  using namespace AMDGPU::DPP;


  bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());

  if (result) {

    int64_t Imm = getImm();

    return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||

           (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||

           (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||

           (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||

           (Imm == DppCtrl::WAVE_SHL1) ||

           (Imm == DppCtrl::WAVE_ROL1) ||

           (Imm == DppCtrl::WAVE_SHR1) ||

           (Imm == DppCtrl::WAVE_ROR1) ||

           (Imm == DppCtrl::ROW_MIRROR) ||

           (Imm == DppCtrl::ROW_HALF_MIRROR) ||

           (Imm == DppCtrl::BCAST15) ||

           (Imm == DppCtrl::BCAST31) ||

           (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||

           (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);

  }

  return false;

}


//===----------------------------------------------------------------------===//

// mAI

//===----------------------------------------------------------------------===//


bool AMDGPUOperand::isBLGP() const {

  return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());

}


bool AMDGPUOperand::isS16Imm() const {

  return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));

}


bool AMDGPUOperand::isU16Imm() const {

  return isImmLiteral() && isUInt<16>(getImm());

}


//===----------------------------------------------------------------------===//

// dim

//===----------------------------------------------------------------------===//


bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {

  // We want to allow "dim:1D" etc.,

  // but the initial 1 is tokenized as an integer.

  std::string Token;

  if (isToken(AsmToken::Integer)) {

    SMLoc Loc = getToken().getEndLoc();

    Token = std::string(getTokenStr());

    lex();

    if (getLoc() != Loc)

      return false;

  }


  StringRef Suffix;

  if (!parseId(Suffix))

    return false;

  Token += Suffix;


  StringRef DimId = Token;

  DimId.consume_front("SQ_RSRC_IMG_");


  const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);

  if (!DimInfo)

    return false;


  Encoding = DimInfo->Encoding;

  return true;

}


ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {

  if (!isGFX10Plus())

    return ParseStatus::NoMatch;


  SMLoc S = getLoc();


  if (!trySkipId("dim", AsmToken::Colon))

    return ParseStatus::NoMatch;


  unsigned Encoding;

  SMLoc Loc = getLoc();

  if (!parseDimId(Encoding))

    return Error(Loc, "invalid dim value");


  Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,

                                              AMDGPUOperand::ImmTyDim));

  return ParseStatus::Success;

}


//===----------------------------------------------------------------------===//

// dpp

//===----------------------------------------------------------------------===//


ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {

  SMLoc S = getLoc();


  if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))

    return ParseStatus::NoMatch;


  // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]


  int64_t Sels[8];


  if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))

    return ParseStatus::Failure;


  for (size_t i = 0; i < 8; ++i) {

    if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))

      return ParseStatus::Failure;


    SMLoc Loc = getLoc();

    if (getParser().parseAbsoluteExpression(Sels[i]))

      return ParseStatus::Failure;

    if (0 > Sels[i] || 7 < Sels[i])

      return Error(Loc, "expected a 3-bit value");

  }


  if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))

    return ParseStatus::Failure;


  unsigned DPP8 = 0;

  for (size_t i = 0; i < 8; ++i)

    DPP8 |= (Sels[i] << (i * 3));


  Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));

  return ParseStatus::Success;

}


bool

AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,

                                    const OperandVector &Operands) {

  if (Ctrl == "row_newbcast")

    return isGFX90A();


  if (Ctrl == "row_share" ||

      Ctrl == "row_xmask")

    return isGFX10Plus();


  if (Ctrl == "wave_shl" ||

      Ctrl == "wave_shr" ||

      Ctrl == "wave_rol" ||

      Ctrl == "wave_ror" ||

      Ctrl == "row_bcast")

    return isVI() || isGFX9();


  return Ctrl == "row_mirror" ||

         Ctrl == "row_half_mirror" ||

         Ctrl == "quad_perm" ||

         Ctrl == "row_shl" ||

         Ctrl == "row_shr" ||

         Ctrl == "row_ror";

}


int64_t

AMDGPUAsmParser::parseDPPCtrlPerm() {

  // quad_perm:[%d,%d,%d,%d]


  if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))

    return -1;


  int64_t Val = 0;

  for (int i = 0; i < 4; ++i) {

    if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))

      return -1;


    int64_t Temp;

    SMLoc Loc = getLoc();

    if (getParser().parseAbsoluteExpression(Temp))

      return -1;

    if (Temp < 0 || Temp > 3) {

      Error(Loc, "expected a 2-bit value");

      return -1;

    }


    Val += (Temp << i * 2);

  }


  if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))

    return -1;


  return Val;

}


int64_t

AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {

  using namespace AMDGPU::DPP;


  // sel:%d


  int64_t Val;

  SMLoc Loc = getLoc();


  if (getParser().parseAbsoluteExpression(Val))

    return -1;


  struct DppCtrlCheck {

    int64_t Ctrl;

    int Lo;

    int Hi;

  };


  DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)

    .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})

    .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})

    .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})

    .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})

    .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})

    .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})

    .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})

    .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})

    .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})

    .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})

    .Default({-1, 0, 0});


  bool Valid;

  if (Check.Ctrl == -1) {

    Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));

    Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;

  } else {

    Valid = Check.Lo <= Val && Val <= Check.Hi;

    Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);

  }


  if (!Valid) {

    Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));

    return -1;

  }


  return Val;

}


ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {

  using namespace AMDGPU::DPP;


  if (!isToken(AsmToken::Identifier) ||

      !isSupportedDPPCtrl(getTokenStr(), Operands))

    return ParseStatus::NoMatch;


  SMLoc S = getLoc();

  int64_t Val = -1;

  StringRef Ctrl;


  parseId(Ctrl);


  if (Ctrl == "row_mirror") {

    Val = DppCtrl::ROW_MIRROR;

  } else if (Ctrl == "row_half_mirror") {

    Val = DppCtrl::ROW_HALF_MIRROR;

  } else {

    if (skipToken(AsmToken::Colon, "expected a colon")) {

      if (Ctrl == "quad_perm") {

        Val = parseDPPCtrlPerm();

      } else {

        Val = parseDPPCtrlSel(Ctrl);

      }

    }

  }


  if (Val == -1)

    return ParseStatus::Failure;


  Operands.push_back(

    AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));

  return ParseStatus::Success;

}


void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,

                                 bool IsDPP8) {

  OptionalImmIndexMap OptionalIdx;

  unsigned Opc = Inst.getOpcode();

  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());


  // MAC instructions are special because they have 'old'

  // operand which is not tied to dst (but assumed to be).

  // They also have dummy unused src2_modifiers.

  int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);

  int Src2ModIdx =

      AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);

  bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&

               Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;


  unsigned I = 1;

  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {

    ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);

  }


  int Fi = 0;

  int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);

  bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||

                        Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||

                        Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||

                        Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;


  for (unsigned E = Operands.size(); I != E; ++I) {


    if (IsMAC) {

      int NumOperands = Inst.getNumOperands();

      if (OldIdx == NumOperands) {

        // Handle old operand

        constexpr int DST_IDX = 0;

        Inst.addOperand(Inst.getOperand(DST_IDX));

      } else if (Src2ModIdx == NumOperands) {

        // Add unused dummy src2_modifiers

        Inst.addOperand(MCOperand::createImm(0));

      }

    }


    if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {

      Inst.addOperand(Inst.getOperand(0));

    }


    if (IsVOP3CvtSrDpp) {

      if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {

        Inst.addOperand(MCOperand::createImm(0));

        Inst.addOperand(MCOperand::createReg(MCRegister()));

      }

    }


    auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),

                                            MCOI::TIED_TO);

    if (TiedTo != -1) {

      assert((unsigned)TiedTo < Inst.getNumOperands());

      // handle tied old or src2 for MAC instructions

      Inst.addOperand(Inst.getOperand(TiedTo));

    }

    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);

    // Add the register arguments

    if (IsDPP8 && Op.isDppFI()) {

      Fi = Op.getImm();

    } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {

      Op.addRegOrImmWithFPInputModsOperands(Inst, 2);

    } else if (Op.isReg()) {

      Op.addRegOperands(Inst, 1);

    } else if (Op.isImm() &&

               Desc.operands()[Inst.getNumOperands()].RegClass != -1) {

      assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");

      Op.addImmOperands(Inst, 1);

    } else if (Op.isImm()) {

      OptionalIdx[Op.getImmTy()] = I;

    } else {

      llvm_unreachable("unhandled operand type");

    }

  }


  if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp) && !IsVOP3CvtSrDpp)

    addOptionalImmOperand(Inst, Operands, OptionalIdx,

                          AMDGPUOperand::ImmTyClamp);


  if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {

    if (VdstInIdx == static_cast<int>(Inst.getNumOperands()))

      Inst.addOperand(Inst.getOperand(0));

    addOptionalImmOperand(Inst, Operands, OptionalIdx,

                          AMDGPUOperand::ImmTyByteSel);

  }


  if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))

    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);


  if (Desc.TSFlags & SIInstrFlags::VOP3P)

    cvtVOP3P(Inst, Operands, OptionalIdx);

  else if (Desc.TSFlags & SIInstrFlags::VOP3)

    cvtVOP3OpSel(Inst, Operands, OptionalIdx);

  else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {

    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);

  }


  if (IsDPP8) {

    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);

    using namespace llvm::AMDGPU::DPP;

    Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));

  } else {

    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);

    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);

    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);

    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);


    if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))

      addOptionalImmOperand(Inst, Operands, OptionalIdx,

                            AMDGPUOperand::ImmTyDppFI);

  }

}


void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {

  OptionalImmIndexMap OptionalIdx;


  unsigned I = 1;

  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());

  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {

    ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);

  }


  int Fi = 0;

  for (unsigned E = Operands.size(); I != E; ++I) {

    auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),

                                            MCOI::TIED_TO);

    if (TiedTo != -1) {

      assert((unsigned)TiedTo < Inst.getNumOperands());

      // handle tied old or src2 for MAC instructions

      Inst.addOperand(Inst.getOperand(TiedTo));

    }

    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);

    // Add the register arguments

    if (Op.isReg() && validateVccOperand(Op.getReg())) {

      // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.

      // Skip it.

      continue;

    }


    if (IsDPP8) {

      if (Op.isDPP8()) {

        Op.addImmOperands(Inst, 1);

      } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {

        Op.addRegWithFPInputModsOperands(Inst, 2);

      } else if (Op.isDppFI()) {

        Fi = Op.getImm();

      } else if (Op.isReg()) {

        Op.addRegOperands(Inst, 1);

      } else {

        llvm_unreachable("Invalid operand type");

      }

    } else {

      if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {

        Op.addRegWithFPInputModsOperands(Inst, 2);

      } else if (Op.isReg()) {

        Op.addRegOperands(Inst, 1);

      } else if (Op.isDPPCtrl()) {

        Op.addImmOperands(Inst, 1);

      } else if (Op.isImm()) {

        // Handle optional arguments

        OptionalIdx[Op.getImmTy()] = I;

      } else {

        llvm_unreachable("Invalid operand type");

      }

    }

  }


  if (IsDPP8) {

    using namespace llvm::AMDGPU::DPP;

    Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));

  } else {

    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);

    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);

    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);

    if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {

      addOptionalImmOperand(Inst, Operands, OptionalIdx,

                            AMDGPUOperand::ImmTyDppFI);

    }

  }

}


//===----------------------------------------------------------------------===//

// sdwa

//===----------------------------------------------------------------------===//


ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,

                                          StringRef Prefix,

                                          AMDGPUOperand::ImmTy Type) {

  return parseStringOrIntWithPrefix(

      Operands, Prefix,

      {"BYTE_0", "BYTE_1", "BYTE_2", "BYTE_3", "WORD_0", "WORD_1", "DWORD"},

      Type);

}


ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {

  return parseStringOrIntWithPrefix(

      Operands, "dst_unused", {"UNUSED_PAD", "UNUSED_SEXT", "UNUSED_PRESERVE"},

      AMDGPUOperand::ImmTySDWADstUnused);

}


void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {

  cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);

}


void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {

  cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);

}


void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {

  cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);

}


void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {

  cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);

}


void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {

  cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());

}


void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,

                              uint64_t BasicInstType,

                              bool SkipDstVcc,

                              bool SkipSrcVcc) {

  using namespace llvm::AMDGPU::SDWA;


  OptionalImmIndexMap OptionalIdx;

  bool SkipVcc = SkipDstVcc || SkipSrcVcc;

  bool SkippedVcc = false;


  unsigned I = 1;

  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());

  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {

    ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);

  }


  for (unsigned E = Operands.size(); I != E; ++I) {

    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);

    if (SkipVcc && !SkippedVcc && Op.isReg() &&

        (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {

      // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.

      // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)

      // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.

      // Skip VCC only if we didn't skip it on previous iteration.

      // Note that src0 and src1 occupy 2 slots each because of modifiers.

      if (BasicInstType == SIInstrFlags::VOP2 &&

          ((SkipDstVcc && Inst.getNumOperands() == 1) ||

           (SkipSrcVcc && Inst.getNumOperands() == 5))) {

        SkippedVcc = true;

        continue;

      }

      if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) {

        SkippedVcc = true;

        continue;

      }

    }

    if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {

      Op.addRegOrImmWithInputModsOperands(Inst, 2);

    } else if (Op.isImm()) {

      // Handle optional arguments

      OptionalIdx[Op.getImmTy()] = I;

    } else {

      llvm_unreachable("Invalid operand type");

    }

    SkippedVcc = false;

  }


  const unsigned Opc = Inst.getOpcode();

  if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&

      Opc != AMDGPU::V_NOP_sdwa_vi) {

    // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments

    switch (BasicInstType) {

    case SIInstrFlags::VOP1:

      if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))

        addOptionalImmOperand(Inst, Operands, OptionalIdx,

                              AMDGPUOperand::ImmTyClamp, 0);


      if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))

        addOptionalImmOperand(Inst, Operands, OptionalIdx,

                              AMDGPUOperand::ImmTyOModSI, 0);


      if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))

        addOptionalImmOperand(Inst, Operands, OptionalIdx,

                              AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);


      if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))

        addOptionalImmOperand(Inst, Operands, OptionalIdx,

                              AMDGPUOperand::ImmTySDWADstUnused,

                              DstUnused::UNUSED_PRESERVE);


      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);

      break;


    case SIInstrFlags::VOP2:

      addOptionalImmOperand(Inst, Operands, OptionalIdx,

                            AMDGPUOperand::ImmTyClamp, 0);


      if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))

        addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);


      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);

      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);

      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);

      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);

      break;


    case SIInstrFlags::VOPC:

      if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))

        addOptionalImmOperand(Inst, Operands, OptionalIdx,

                              AMDGPUOperand::ImmTyClamp, 0);

      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);

      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);

      break;


    default:

      llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");

    }

  }


  // special case v_mac_{f16, f32}:

  // it has src2 register operand that is tied to dst operand

  if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||

      Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {

    auto *it = Inst.begin();

    std::advance(

      it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));

    Inst.insert(it, Inst.getOperand(0)); // src2 = dst

  }

}


/// Force static initialization.

extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void


LLVMInitializeAMDGPUAsmParser() {

  RegisterMCAsmParser<AMDGPUAsmParser> A(getTheR600Target());

  RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());

}


#define GET_REGISTER_MATCHER

#define GET_MATCHER_IMPLEMENTATION

#define GET_MNEMONIC_SPELL_CHECKER

#define GET_MNEMONIC_CHECKER

#include "AMDGPUGenAsmMatcher.inc"


ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,

                                                unsigned MCK) {

  switch (MCK) {

  case MCK_addr64:

    return parseTokenOp("addr64", Operands);

  case MCK_done:

    return parseTokenOp("done", Operands);

  case MCK_idxen:

    return parseTokenOp("idxen", Operands);

  case MCK_lds:

    return parseTokenOp("lds", Operands);

  case MCK_offen:

    return parseTokenOp("offen", Operands);

  case MCK_off:

    return parseTokenOp("off", Operands);

  case MCK_row_95_en:

    return parseTokenOp("row_en", Operands);

  case MCK_gds:

    return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);

  case MCK_tfe:

    return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);

  }

  return tryCustomParseOperand(Operands, MCK);

}


// This function should be defined after auto-generated include so that we have

// MatchClassKind enum defined

unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,

                                                     unsigned Kind) {

  // Tokens like "glc" would be parsed as immediate operands in ParseOperand().

  // But MatchInstructionImpl() expects to meet token and fails to validate

  // operand. This method checks if we are given immediate operand but expect to

  // get corresponding token.

  AMDGPUOperand &Operand = (AMDGPUOperand&)Op;

  switch (Kind) {

  case MCK_addr64:

    return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;

  case MCK_gds:

    return Operand.isGDS() ? Match_Success : Match_InvalidOperand;

  case MCK_lds:

    return Operand.isLDS() ? Match_Success : Match_InvalidOperand;

  case MCK_idxen:

    return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;

  case MCK_offen:

    return Operand.isOffen() ? Match_Success : Match_InvalidOperand;

  case MCK_tfe:

    return Operand.isTFE() ? Match_Success : Match_InvalidOperand;

  case MCK_SSrc_b32:

    // When operands have expression values, they will return true for isToken,

    // because it is not possible to distinguish between a token and an

    // expression at parse time. MatchInstructionImpl() will always try to

    // match an operand as a token, when isToken returns true, and when the

    // name of the expression is not a valid token, the match will fail,

    // so we need to handle it here.

    return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;

  case MCK_SSrc_f32:

    return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;

  case MCK_SOPPBrTarget:

    return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;

  case MCK_VReg32OrOff:

    return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;

  case MCK_InterpSlot:

    return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;

  case MCK_InterpAttr:

    return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;

  case MCK_InterpAttrChan:

    return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;

  case MCK_SReg_64:

  case MCK_SReg_64_XEXEC:

    // Null is defined as a 32-bit register but

    // it should also be enabled with 64-bit operands or larger.

    // The following code enables it for SReg_64 and larger operands

    // used as source and destination. Remaining source

    // operands are handled in isInlinableImm.

  case MCK_SReg_96:

  case MCK_SReg_128:

  case MCK_SReg_256:

  case MCK_SReg_512:

    return Operand.isNull() ? Match_Success : Match_InvalidOperand;

  default:

    return Match_InvalidOperand;

  }

}


//===----------------------------------------------------------------------===//

// endpgm

//===----------------------------------------------------------------------===//


ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {

  SMLoc S = getLoc();

  int64_t Imm = 0;


  if (!parseExpr(Imm)) {

    // The operand is optional, if not present default to 0

    Imm = 0;

  }


  if (!isUInt<16>(Imm))

    return Error(S, "expected a 16-bit value");


  Operands.push_back(

      AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));

  return ParseStatus::Success;

}


bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }


//===----------------------------------------------------------------------===//

// Split Barrier

//===----------------------------------------------------------------------===//


bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }

SubReg
unsigned SubReg
Definition AArch64AdvSIMDScalarPass.cpp:102

MRI
unsigned const MachineRegisterInfo * MRI
Definition AArch64AdvSIMDScalarPass.cpp:103

Success
#define Success
Definition AArch64Disassembler.cpp:42

getRegClass
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
Definition AArch64InstrInfo.cpp:4724

RegSize
unsigned RegSize
Definition AArch64MIPeepholeOpt.cpp:165

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

none
@ none
Definition AArch64StackTagging.cpp:81

OperandIndices
SmallVector< int16_t, MAX_SRC_OPERANDS_NUM > OperandIndices
Definition AMDGPUAsmParser.cpp:3852

checkWriteLane
static bool checkWriteLane(const MCInst &Inst)
Definition AMDGPUAsmParser.cpp:3901

getRegNum
static bool getRegNum(StringRef Str, unsigned &Num)
Definition AMDGPUAsmParser.cpp:2926

addSrcModifiersAndSrc
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, AMDGPU::OpName OpName)
Definition AMDGPUAsmParser.cpp:9761

RegularRegisters
static constexpr RegInfo RegularRegisters[]
Definition AMDGPUAsmParser.cpp:2904

getRegularRegInfo
static const RegInfo * getRegularRegInfo(StringRef Str)
Definition AMDGPUAsmParser.cpp:2919

getAllVariants
static ArrayRef< unsigned > getAllVariants()
Definition AMDGPUAsmParser.cpp:3688

getSrcOperandIndices
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
Definition AMDGPUAsmParser.cpp:3856

IsAGPROperand
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
Definition AMDGPUAsmParser.cpp:5188

IsMovrelsSDWAOpcode
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
Definition AMDGPUAsmParser.cpp:4384

getFltSemantics
static const fltSemantics * getFltSemantics(unsigned Size)
Definition AMDGPUAsmParser.cpp:2029

isRegularReg
static bool isRegularReg(RegisterKind Kind)
Definition AMDGPUAsmParser.cpp:2912

LLVMInitializeAMDGPUAsmParser
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
Definition AMDGPUAsmParser.cpp:10489

ConvertOmodMul
static bool ConvertOmodMul(int64_t &Mul)
Definition AMDGPUAsmParser.cpp:9154

PARSE_BITS_ENTRY
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)

isInlineableLiteralOp16
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
Definition AMDGPUAsmParser.cpp:2115

canLosslesslyConvertToFPType
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
Definition AMDGPUAsmParser.cpp:2093

MIMGFlags
constexpr uint64_t MIMGFlags
Definition AMDGPUAsmParser.cpp:4178

AMDGPUCheckMnemonic
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)

applyMnemonicAliases
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)

MAX_SRC_OPERANDS_NUM
constexpr unsigned MAX_SRC_OPERANDS_NUM
Definition AMDGPUAsmParser.cpp:3851

EXPR_RESOLVE_OR_ERROR
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)

ConvertOmodDiv
static bool ConvertOmodDiv(int64_t &Div)
Definition AMDGPUAsmParser.cpp:9162

IsRevOpcode
static bool IsRevOpcode(const unsigned Opcode)
Definition AMDGPUAsmParser.cpp:4598

encodeCnt
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
Definition AMDGPUAsmParser.cpp:7831

getSpecialRegForName
static MCRegister getSpecialRegForName(StringRef RegName)
Definition AMDGPUAsmParser.cpp:2773

addOptionalImmOperand
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0, std::optional< unsigned > InsertAt=std::nullopt)
Definition AMDGPUAsmParser.cpp:7334

cvtVOP3DstOpSelOnly
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
Definition AMDGPUAsmParser.cpp:9275

isRegOrImmWithInputMods
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
Definition AMDGPUAsmParser.cpp:9321

getOpFltSemantics
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
Definition AMDGPUAsmParser.cpp:2046

isInvalidVOPDY
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
Definition AMDGPUAsmParser.cpp:5822

AMDGPUMnemonicSpellCheck
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)

encodeBitmaskPerm
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
Definition AMDGPUAsmParser.cpp:8666

isSafeTruncation
static bool isSafeTruncation(int64_t Val, unsigned Size)
Definition AMDGPUAsmParser.cpp:2111

AMDGPUAsmUtils.h

AMDGPUBaseInfo.h

AMDGPUInstPrinter.h

AMDGPUMCAsmInfo.h

AMDGPUMCExpr.h

AMDGPUMCKernelDescriptor.h
AMDHSA kernel descriptor MCExpr struct for use in MC layer.

AMDGPUMCTargetDesc.h
Provides AMDGPU specific target descriptions.

AMDGPUMetadata.h
AMDGPU metadata definitions and in-memory representations.

AMDGPUTargetInfo.h

AMDGPUTargetStreamer.h

AMDHSAKernelDescriptor.h
AMDHSA kernel descriptor definitions.

parseExpr
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
Definition AMDKernelCodeTUtils.cpp:312

AMDKernelCodeTUtils.h
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.

AMDKernelCodeT.h

AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
Definition AMDKernelCodeT.h:127

APFloat.h
This file declares a class to represent arbitrary precision floating point values and provide a varie...

AsmLexer.h

ELF.h

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Info
Analysis containing CSE Info
Definition CSEInfo.cpp:27

Casting.h

Compiler.h

LLVM_READNONE
#define LLVM_READNONE
Definition Compiler.h:315

LLVM_ABI
#define LLVM_ABI
Definition Compiler.h:213

LLVM_EXTERNAL_VISIBILITY
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132

Default
@ Default
Definition DwarfDebug.cpp:86

Check
#define Check(C,...)
Definition GenericConvergenceVerifierImpl.h:34

decode
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
Definition InlineInfo.cpp:179

InlinePriorityMode::Size
@ Size
Definition InlineOrder.cpp:25

TemplateParamKind::Type
@ Type
Definition ItaniumDemangle.h:1243

Ops
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Definition ItaniumDemangle.h:3368

RegName
#define RegName(no)

Options
static LVOptions Options
Definition LVOptions.cpp:25

Direction
Loop::LoopBounds::Direction Direction
Definition LoopInfo.cpp:243

MCAsmInfo.h

MCAsmParser.h

MCContext.h

MCExpr.h

MCInst.h

MCInstrDesc.h

MCParsedAsmOperand.h

MCRegisterInfo.h

MCSymbol.h

MCTargetAsmParser.h

F
#define F(x, y, z)
Definition MD5.cpp:55

I
#define I(x, y, z)
Definition MD5.cpp:58

Operands
mir Rename Register Operands
Definition MIRNamerPass.cpp:74

Reg
Register Reg
Definition MachineSink.cpp:2117

TRI
Register const TargetRegisterInfo * TRI
Definition MachineSink.cpp:2118

MachineValueType.h

MathExtras.h

Context
@ Context
Definition MemProfContextDisambiguation.cpp:124

getReg
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
Definition MipsDisassembler.cpp:106

isReg
static bool isReg(const MCInst &MI, unsigned OpNo)
Definition MipsInstPrinter.cpp:32

OpIdx
MachineInstr unsigned OpIdx
Definition NVPTXPrologEpilogPass.cpp:56

Range
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))

II
uint64_t IntrinsicInst * II
Definition NVVMIntrRange.cpp:46

P
#define P(N)

if
if(PassOpts->AAPipeline)
Definition PassBuilderBindings.cpp:64

Opc
auto Opc
Definition RISCVRedundantCopyElimination.cpp:75

Mode
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))

SIDefines.h

SIInstrInfo.h
Interface definition for SIInstrInfo.

DefaultVal
unsigned unsigned DefaultVal
Definition SPIRVModuleAnalysis.cpp:57

contains
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480

SmallBitVector.h
This file implements the SmallBitVector class.

Enabled
static bool Enabled
Definition Statistic.cpp:46

StringSet.h
StringSet - A set-like wrapper for the StringMap.

Y
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")

X
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")

initialize
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
Definition TargetLibraryInfo.cpp:954

Int
@ Int
Definition TargetLibraryInfo.cpp:65

TargetParser.h

TargetRegistry.h

Twine.h

getOpcode
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247

Mul
BinaryOperator * Mul
Definition X86PartialReduction.cpp:68

llvm::AMDGPUInstPrinter::getRegisterName
static const char * getRegisterName(MCRegister Reg)

llvm::AMDGPUMCExpr::createMax
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
Definition AMDGPUMCExpr.h:79

llvm::AMDGPUMCExpr::create
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
Definition AMDGPUMCExpr.cpp:45

llvm::AMDGPUMCExpr::createExtraSGPRs
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
Definition AMDGPUMCExpr.cpp:295

llvm::AMDGPUMCExpr::createAlignTo
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
Definition AMDGPUMCExpr.h:93

llvm::AMDGPUMCExpr::VariantKind
VariantKind
Definition AMDGPUMCExpr.h:32

llvm::APFloat
Definition APFloat.h:900

llvm::APFloat::convert
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:6057

llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41

llvm::ArrayRef::end
iterator end() const
Definition ArrayRef.h:136

llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147

llvm::AsmToken
Target independent representation for an assembler token.
Definition MCAsmMacro.h:22

llvm::AsmToken::getString
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition MCAsmMacro.h:103

llvm::AsmToken::is
bool is(TokenKind K) const
Definition MCAsmMacro.h:75

llvm::AsmToken::TokenKind
TokenKind
Definition MCAsmMacro.h:24

llvm::AsmToken::Minus
@ Minus
Definition MCAsmMacro.h:46

llvm::AsmToken::Eof
@ Eof
Definition MCAsmMacro.h:26

llvm::AsmToken::String
@ String
Definition MCAsmMacro.h:30

llvm::AsmToken::Error
@ Error
Definition MCAsmMacro.h:26

llvm::AsmToken::Space
@ Space
Definition MCAsmMacro.h:45

llvm::AsmToken::Integer
@ Integer
Definition MCAsmMacro.h:33

llvm::AsmToken::RBrac
@ RBrac
Definition MCAsmMacro.h:49

llvm::AsmToken::Colon
@ Colon
Definition MCAsmMacro.h:44

llvm::AsmToken::LBrac
@ LBrac
Definition MCAsmMacro.h:49

llvm::AsmToken::RCurly
@ RCurly
Definition MCAsmMacro.h:49

llvm::AsmToken::Pipe
@ Pipe
Definition MCAsmMacro.h:52

llvm::AsmToken::Identifier
@ Identifier
Definition MCAsmMacro.h:29

llvm::AsmToken::Amp
@ Amp
Definition MCAsmMacro.h:53

llvm::AsmToken::LParen
@ LParen
Definition MCAsmMacro.h:49

llvm::AsmToken::LCurly
@ LCurly
Definition MCAsmMacro.h:49

llvm::AsmToken::RParen
@ RParen
Definition MCAsmMacro.h:49

llvm::AsmToken::Comma
@ Comma
Definition MCAsmMacro.h:50

llvm::AsmToken::Real
@ Real
Definition MCAsmMacro.h:37

llvm::AsmToken::EndOfStatement
@ EndOfStatement
Definition MCAsmMacro.h:43

llvm::DstOp
Definition MachineIRBuilder.h:71

llvm::DstOp::getReg
Register getReg() const
Definition MachineIRBuilder.h:122

llvm::FeatureBitset
Container class for subtarget features.
Definition SubtargetFeature.h:42

llvm::FeatureBitset::test
constexpr bool test(unsigned I) const
Definition SubtargetFeature.h:83

llvm::FeatureBitset::flip
constexpr FeatureBitset & flip(unsigned I)
Definition SubtargetFeature.h:73

llvm::MCAsmInfo::printExpr
void printExpr(raw_ostream &, const MCExpr &) const
Definition MCAsmInfo.cpp:153

llvm::MCAsmParserExtension::Initialize
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
Definition MCAsmParserExtension.cpp:21

llvm::MCBinaryExpr::createAdd
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:343

llvm::MCBinaryExpr::createDiv
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:353

llvm::MCBinaryExpr::createSub
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:428

llvm::MCConstantExpr::create
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212

llvm::MCContext::getOrCreateSymbol
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition MCContext.cpp:203

llvm::MCExpr
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34

llvm::MCInst
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188

llvm::MCInst::getNumOperands
unsigned getNumOperands() const
Definition MCInst.h:212

llvm::MCInst::getLoc
SMLoc getLoc() const
Definition MCInst.h:208

llvm::MCInst::setLoc
void setLoc(SMLoc loc)
Definition MCInst.h:207

llvm::MCInst::getOpcode
unsigned getOpcode() const
Definition MCInst.h:202

llvm::MCInst::insert
iterator insert(iterator I, const MCOperand &Op)
Definition MCInst.h:232

llvm::MCInst::addOperand
void addOperand(const MCOperand Op)
Definition MCInst.h:215

llvm::MCInst::begin
iterator begin()
Definition MCInst.h:227

llvm::MCInst::size
size_t size() const
Definition MCInst.h:226

llvm::MCInst::getOperand
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210

llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition MCInstrDesc.h:199

llvm::MCInstrDesc::TSFlags
uint64_t TSFlags
Definition MCInstrDesc.h:216

llvm::MCInstrInfo::get
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:64

llvm::MCOperand
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40

llvm::MCOperand::setImm
void setImm(int64_t Val)
Definition MCInst.h:89

llvm::MCOperand::createExpr
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166

llvm::MCOperand::getImm
int64_t getImm() const
Definition MCInst.h:84

llvm::MCOperand::createReg
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138

llvm::MCOperand::createImm
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145

llvm::MCOperand::isImm
bool isImm() const
Definition MCInst.h:66

llvm::MCOperand::setReg
void setReg(MCRegister Reg)
Set the register number.
Definition MCInst.h:79

llvm::MCOperand::isReg
bool isReg() const
Definition MCInst.h:65

llvm::MCOperand::getReg
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73

llvm::MCOperand::isExpr
bool isExpr() const
Definition MCInst.h:69

llvm::MCParsedAsmOperand
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
Definition MCParsedAsmOperand.h:27

llvm::MCRegisterClass
MCRegisterClass - Base class of TargetRegisterClass.
Definition MCRegisterInfo.h:36

llvm::MCRegisterClass::getRegister
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
Definition MCRegisterInfo.h:67

llvm::MCRegisterClass::getNumRegs
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
Definition MCRegisterInfo.h:63

llvm::MCRegisterClass::contains
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
Definition MCRegisterInfo.h:74

llvm::MCRegisterInfo
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Definition MCRegisterInfo.h:150

llvm::MCRegister
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33

llvm::MCRegister::isValid
constexpr bool isValid() const
Definition MCRegister.h:76

llvm::MCStreamer::emitInstruction
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
Definition MCStreamer.cpp:1195

llvm::MCSymbol::isVariable
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267

llvm::MCSymbol::setVariableValue
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50

llvm::MCSymbol::setRedefinable
void setRedefinable(bool Value)
Mark this symbol as redefinable.
Definition MCSymbol.h:210

llvm::MCSymbol::getVariableValue
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270

llvm::MCTargetAsmParser
MCTargetAsmParser - Generic interface to target specific assembly parsers.
Definition MCTargetAsmParser.h:335

llvm::MVT
Machine Value Type.
Definition MachineValueType.h:36

llvm::MVT::SimpleTy
SimpleValueType SimpleTy
Definition MachineValueType.h:56

llvm::MVT::getScalarSizeInBits
uint64_t getScalarSizeInBits() const
Definition MachineValueType.h:347

llvm::MVT::getSizeInBits
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
Definition MachineValueType.h:309

llvm::MVT::getScalarType
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Definition MachineValueType.h:260

llvm::ParseStatus
Ternary parse status returned by various parse* methods.
Definition MCTargetAsmParser.h:127

llvm::ParseStatus::isFailure
constexpr bool isFailure() const
Definition MCTargetAsmParser.h:152

llvm::ParseStatus::Failure
static constexpr StatusTy Failure
Definition MCTargetAsmParser.h:139

llvm::ParseStatus::isSuccess
constexpr bool isSuccess() const
Definition MCTargetAsmParser.h:151

llvm::ParseStatus::Success
static constexpr StatusTy Success
Definition MCTargetAsmParser.h:138

llvm::ParseStatus::NoMatch
static constexpr StatusTy NoMatch
Definition MCTargetAsmParser.h:140

llvm::ParseStatus::isNoMatch
constexpr bool isNoMatch() const
Definition MCTargetAsmParser.h:153

llvm::Register::id
constexpr unsigned id() const
Definition Register.h:95

llvm::SMLoc
Represents a location in source code.
Definition SMLoc.h:23

llvm::SMLoc::getFromPointer
static SMLoc getFromPointer(const char *Ptr)
Definition SMLoc.h:36

llvm::SMLoc::getPointer
constexpr const char * getPointer() const
Definition SMLoc.h:34

llvm::SMLoc::isValid
constexpr bool isValid() const
Definition SMLoc.h:29

llvm::SMRange
Represents a range in source code.
Definition SMLoc.h:48

llvm::SMRange::Start
SMLoc Start
Definition SMLoc.h:50

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition SmallVector.h:574

llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition SmallVector.h:674

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition SmallVector.h:414

llvm::SmallVectorTemplateCommon::size
size_t size() const
Definition SmallVector.h:79

llvm::SmallVectorTemplateCommon::empty
bool empty() const
Definition SmallVector.h:82

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition SmallVector.h:1197

llvm::StringLiteral
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:862

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55

llvm::StringRef::consume_back
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
Definition StringRef.h:665

llvm::StringRef::substr
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:581

llvm::StringRef::starts_with
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:269

llvm::StringRef::empty
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:151

llvm::StringRef::drop_front
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:619

llvm::StringRef::size
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:154

llvm::StringRef::data
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:148

llvm::StringRef::consume_front
bool consume_front(StringRef Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition StringRef.h:645

llvm::StringRef::ends_with
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition StringRef.h:281

llvm::StringSet::contains
bool contains(StringRef key) const
Check if the set contains the given key.
Definition StringSet.h:60

llvm::StringSet::insert
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition StringSet.h:39

llvm::StringSwitch
A switch()-like statement whose cases are string literals.
Definition StringSwitch.h:43

llvm::StringSwitch::Case
StringSwitch & Case(StringLiteral S, T Value)
Definition StringSwitch.h:68

llvm::StringSwitch::Default
R Default(T Value)
Definition StringSwitch.h:177

llvm::Triple::AMDPAL
@ AMDPAL
Definition Triple.h:240

llvm::Triple::r600
@ r600
Definition Triple.h:76

llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82

llvm::Twine::str
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17

llvm::detail::DenseSetImpl::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:194

llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53

uint32_t

uint64_t

uint8_t

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

OpName
Definition R600Defines.h:62

llvm::AMDGPUAsmVariants::DPP
@ DPP
Definition SIDefines.h:324

llvm::AMDGPUAsmVariants::VOP3_DPP
@ VOP3_DPP
Definition SIDefines.h:325

llvm::AMDGPUAsmVariants::SDWA9
@ SDWA9
Definition SIDefines.h:323

llvm::AMDGPUAsmVariants::SDWA
@ SDWA
Definition SIDefines.h:322

llvm::AMDGPUAsmVariants::VOP3
@ VOP3
Definition SIDefines.h:321

llvm::AMDGPUAsmVariants::DEFAULT
@ DEFAULT
Definition SIDefines.h:320

llvm::AMDGPU::CPol::CPol
CPol
Definition SIDefines.h:367

llvm::AMDGPU::CPol::TH_NT_WB
@ TH_NT_WB
Definition SIDefines.h:390

llvm::AMDGPU::CPol::TH_TYPE_STORE
@ TH_TYPE_STORE
Definition SIDefines.h:419

llvm::AMDGPU::CPol::TH_ATOMIC_NT
@ TH_ATOMIC_NT
Definition SIDefines.h:397

llvm::AMDGPU::CPol::DLC
@ DLC
Definition SIDefines.h:370

llvm::AMDGPU::CPol::TH_ATOMIC_RETURN
@ TH_ATOMIC_RETURN
Definition SIDefines.h:396

llvm::AMDGPU::CPol::NT
@ NT
Definition SIDefines.h:374

llvm::AMDGPU::CPol::TH_RT_NT
@ TH_RT_NT
Definition SIDefines.h:388

llvm::AMDGPU::CPol::SC0
@ SC0
Definition SIDefines.h:372

llvm::AMDGPU::CPol::SLC
@ SLC
Definition SIDefines.h:369

llvm::AMDGPU::CPol::SC1
@ SC1
Definition SIDefines.h:373

llvm::AMDGPU::CPol::SCOPE_CU
@ SCOPE_CU
Definition SIDefines.h:404

llvm::AMDGPU::CPol::GLC
@ GLC
Definition SIDefines.h:368

llvm::AMDGPU::CPol::SCOPE_SYS
@ SCOPE_SYS
Definition SIDefines.h:407

llvm::AMDGPU::CPol::TH_NT_RT
@ TH_NT_RT
Definition SIDefines.h:387

llvm::AMDGPU::CPol::TH_RT
@ TH_RT
Definition SIDefines.h:382

llvm::AMDGPU::CPol::SCOPE_SE
@ SCOPE_SE
Definition SIDefines.h:405

llvm::AMDGPU::CPol::TH_LU
@ TH_LU
Definition SIDefines.h:385

llvm::AMDGPU::CPol::SCOPE_DEV
@ SCOPE_DEV
Definition SIDefines.h:406

llvm::AMDGPU::CPol::NV
@ NV
Definition SIDefines.h:409

llvm::AMDGPU::CPol::TH_NT_HT
@ TH_NT_HT
Definition SIDefines.h:389

llvm::AMDGPU::CPol::TH_BYPASS
@ TH_BYPASS
Definition SIDefines.h:391

llvm::AMDGPU::CPol::SCAL
@ SCAL
Definition SIDefines.h:413

llvm::AMDGPU::CPol::SCOPE
@ SCOPE
Definition SIDefines.h:403

llvm::AMDGPU::CPol::TH_NT
@ TH_NT
Definition SIDefines.h:383

llvm::AMDGPU::CPol::SCC
@ SCC
Definition SIDefines.h:371

llvm::AMDGPU::CPol::TH_HT
@ TH_HT
Definition SIDefines.h:384

llvm::AMDGPU::CPol::TH
@ TH
Definition SIDefines.h:381

llvm::AMDGPU::CPol::TH_ATOMIC_CASCADE
@ TH_ATOMIC_CASCADE
Definition SIDefines.h:398

llvm::AMDGPU::CPol::TH_WB
@ TH_WB
Definition SIDefines.h:386

llvm::AMDGPU::CPol::TH_TYPE_LOAD
@ TH_TYPE_LOAD
Definition SIDefines.h:418

llvm::AMDGPU::CPol::TH_REAL_BYPASS
@ TH_REAL_BYPASS
Definition SIDefines.h:421

llvm::AMDGPU::CPol::TH_TYPE_ATOMIC
@ TH_TYPE_ATOMIC
Definition SIDefines.h:420

llvm::AMDGPU::DPP::DppCtrl
DppCtrl
Definition SIDefines.h:951

llvm::AMDGPU::DPP::BCAST31
@ BCAST31
Definition SIDefines.h:982

llvm::AMDGPU::DepCtr::encodeDepCtr
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2018

llvm::AMDGPU::DepCtr::getDefaultDepCtrEncoding
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:1999

llvm::AMDGPU::EncValues::IS_VGPR
@ IS_VGPR
Definition SIDefines.h:349

llvm::AMDGPU::Exp::isSupportedTgtId
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2168

llvm::AMDGPU::Exp::getTgtId
unsigned getTgtId(const StringRef Name)
Definition AMDGPUBaseInfo.cpp:2145

llvm::AMDGPU::HSAMD::Kernel::Arg::Key::Align
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
Definition AMDGPUMetadata.h:183

llvm::AMDGPU::HSAMD::Kernel::CodeProps::Key::NumSGPRs
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
Definition AMDGPUMetadata.h:260

llvm::AMDGPU::HSAMD::Kernel::Key::SymbolName
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
Definition AMDGPUMetadata.h:388

llvm::AMDGPU::HSAMD::V3::AssemblerDirectiveBegin
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
Definition AMDGPUMetadata.h:466

llvm::AMDGPU::HSAMD::V3::AssemblerDirectiveEnd
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
Definition AMDGPUMetadata.h:468

llvm::AMDGPU::HSAMD::AssemblerDirectiveBegin
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
Definition AMDGPUMetadata.h:57

llvm::AMDGPU::HWEncoding::IS_AGPR
@ IS_AGPR
Definition SIDefines.h:360

llvm::AMDGPU::Hwreg::getHwregId
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
Definition AMDGPUAsmUtils.cpp:238

llvm::AMDGPU::IsaInfo::getVGPREncodingGranule
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
Definition AMDGPUBaseInfo.cpp:1374

llvm::AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
Definition AMDGPUBaseInfo.h:146

llvm::AMDGPU::IsaInfo::getSGPREncodingGranule
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1250

llvm::AMDGPU::IsaInfo::getLocalMemorySize
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1149

llvm::AMDGPU::IsaInfo::getAddressableNumSGPRs
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1259

llvm::AMDGPU::MTBUFFormat::DFMT_UNDEF
@ DFMT_UNDEF
Definition SIDefines.h:616

llvm::AMDGPU::MTBUFFormat::getDefaultFormatEncoding
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2294

llvm::AMDGPU::MTBUFFormat::NFMT_UNDEF
@ NFMT_UNDEF
Definition SIDefines.h:637

llvm::AMDGPU::MTBUFFormat::convertDfmtNfmt2Ufmt
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2273

llvm::AMDGPU::MTBUFFormat::encodeDfmtNfmt
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
Definition AMDGPUBaseInfo.cpp:2239

llvm::AMDGPU::MTBUFFormat::getUnifiedFormat
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2248

llvm::AMDGPU::MTBUFFormat::isValidFormatEncoding
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2290

llvm::AMDGPU::MTBUFFormat::getNfmt
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2214

llvm::AMDGPU::MTBUFFormat::getDfmt
int64_t getDfmt(const StringRef Name)
Definition AMDGPUBaseInfo.cpp:2193

llvm::AMDGPU::MTBUFFormat::UFMT_UNDEF
@ UFMT_UNDEF
Definition SIDefines.h:657

llvm::AMDGPU::PALMD::AssemblerDirective
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
Definition AMDGPUMetadata.h:479

llvm::AMDGPU::PALMD::AssemblerDirectiveBegin
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
Definition AMDGPUMetadata.h:482

llvm::AMDGPU::PALMD::AssemblerDirectiveEnd
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
Definition AMDGPUMetadata.h:485

llvm::AMDGPU::SendMsg::getMsgOpId
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
Definition AMDGPUAsmUtils.cpp:149

llvm::AMDGPU::SendMsg::getMsgId
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
Definition AMDGPUAsmUtils.cpp:141

llvm::AMDGPU::SendMsg::encodeMsg
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
Definition AMDGPUBaseInfo.cpp:2378

llvm::AMDGPU::SendMsg::msgSupportsStream
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2359

llvm::AMDGPU::SendMsg::isValidMsgId
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2312

llvm::AMDGPU::SendMsg::isValidMsgStream
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
Definition AMDGPUBaseInfo.cpp:2333

llvm::AMDGPU::SendMsg::msgRequiresOp
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2353

llvm::AMDGPU::SendMsg::isValidMsgOp
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
Definition AMDGPUBaseInfo.cpp:2316

llvm::AMDGPU::Swizzle
Definition SIDefines.h:849

llvm::AMDGPU::Swizzle::ROTATE_DIR_SHIFT
@ ROTATE_DIR_SHIFT
Definition SIDefines.h:904

llvm::AMDGPU::Swizzle::BITMASK_OR_SHIFT
@ BITMASK_OR_SHIFT
Definition SIDefines.h:894

llvm::AMDGPU::Swizzle::ROTATE_MODE_ENC
@ ROTATE_MODE_ENC
Definition SIDefines.h:874

llvm::AMDGPU::Swizzle::FFT_MODE_ENC
@ FFT_MODE_ENC
Definition SIDefines.h:872

llvm::AMDGPU::Swizzle::BITMASK_PERM_ENC
@ BITMASK_PERM_ENC
Definition SIDefines.h:869

llvm::AMDGPU::Swizzle::QUAD_PERM_ENC
@ QUAD_PERM_ENC
Definition SIDefines.h:866

llvm::AMDGPU::Swizzle::LANE_NUM
@ LANE_NUM
Definition SIDefines.h:885

llvm::AMDGPU::Swizzle::LANE_SHIFT
@ LANE_SHIFT
Definition SIDefines.h:884

llvm::AMDGPU::Swizzle::BITMASK_WIDTH
@ BITMASK_WIDTH
Definition SIDefines.h:891

llvm::AMDGPU::Swizzle::BITMASK_AND_SHIFT
@ BITMASK_AND_SHIFT
Definition SIDefines.h:893

llvm::AMDGPU::Swizzle::BITMASK_XOR_SHIFT
@ BITMASK_XOR_SHIFT
Definition SIDefines.h:895

llvm::AMDGPU::UCVersion::getGFXVersions
ArrayRef< GFXVersion > getGFXVersions()
Definition AMDGPUAsmUtils.cpp:683

llvm::AMDGPU::VGPRIndexMode::Id
Id
Definition SIDefines.h:295

llvm::AMDGPU::VGPRIndexMode::ID_MAX
@ ID_MAX
Definition SIDefines.h:302

llvm::AMDGPU::VGPRIndexMode::OFF
@ OFF
Definition SIDefines.h:306

llvm::AMDGPU::VOP3PEncoding::OpSel
OpSel
Definition SIDefines.h:1059

llvm::AMDGPU::VOPD::X
@ X
Definition AMDGPUBaseInfo.h:691

llvm::AMDGPU::VOPD::Y
@ Y
Definition AMDGPUBaseInfo.h:691

llvm::AMDGPU::VOPD::COMPONENTS
constexpr unsigned COMPONENTS[]
Definition AMDGPUBaseInfo.h:692

llvm::AMDGPU
Definition AMDGPUMetadataVerifier.h:34

llvm::AMDGPU::isPackedFP32Inst
bool isPackedFP32Inst(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:3519

llvm::AMDGPU::isInlinableLiteralBF16
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
Definition AMDGPUBaseInfo.cpp:2985

llvm::AMDGPU::isGFX10_BEncoding
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2554

llvm::AMDGPU::getMIMGInfo
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)

llvm::AMDGPU::getRegOperandSize
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
Definition AMDGPUBaseInfo.cpp:2935

llvm::AMDGPU::isInlinableLiteralFP16
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
Definition AMDGPUBaseInfo.cpp:3006

llvm::AMDGPU::isSGPR
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
Definition AMDGPUBaseInfo.cpp:2601

llvm::AMDGPU::getMCReg
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
Definition AMDGPUBaseInfo.cpp:2675

llvm::AMDGPU::wmmaScaleF8F6F4FormatToNumRegs
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
Definition AMDGPUBaseInfo.cpp:602

llvm::AMDGPU::OPR_ID_UNSUPPORTED
const int OPR_ID_UNSUPPORTED
Definition AMDGPUAsmUtils.h:24

llvm::AMDGPU::isInlinableLiteralV2I16
bool isInlinableLiteralV2I16(uint32_t Literal)
Definition AMDGPUBaseInfo.cpp:3136

llvm::AMDGPU::isHi16Reg
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
Definition AMDGPUBaseInfo.cpp:2608

llvm::AMDGPU::getTemporalHintType
unsigned getTemporalHintType(const MCInstrDesc TID)
Definition AMDGPUBaseInfo.cpp:751

llvm::AMDGPU::getTotalNumVGPRs
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
Definition AMDGPUBaseInfo.cpp:2594

llvm::AMDGPU::isGFX10
bool isGFX10(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2498

llvm::AMDGPU::isInlinableLiteralV2BF16
bool isInlinableLiteralV2BF16(uint32_t Literal)
Definition AMDGPUBaseInfo.cpp:3141

llvm::AMDGPU::getMaxNumUserSGPRs
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2454

llvm::AMDGPU::getNumFlatOffsetBits
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
Definition AMDGPUBaseInfo.cpp:3288

llvm::AMDGPU::hasA16
bool hasA16(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2426

llvm::AMDGPU::isLegalSMRDEncodedSignedOffset
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
Definition AMDGPUBaseInfo.cpp:3225

llvm::AMDGPU::isGFX12Plus
bool isGFX12Plus(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2522

llvm::AMDGPU::getNSAMaxSize
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
Definition AMDGPUBaseInfo.cpp:2443

llvm::AMDGPU::hasPackedD16
bool hasPackedD16(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2434

llvm::AMDGPU::isGFX940
bool isGFX940(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2570

llvm::AMDGPU::isInlinableLiteralV2F16
bool isInlinableLiteralV2F16(uint32_t Literal)
Definition AMDGPUBaseInfo.cpp:3146

llvm::AMDGPU::isHsaAbi
bool isHsaAbi(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:198

llvm::AMDGPU::isGFX11
bool isGFX11(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2510

llvm::AMDGPU::OPR_VAL_INVALID
const int OPR_VAL_INVALID
Definition AMDGPUAsmUtils.h:26

llvm::AMDGPU::getSMEMIsBuffer
bool getSMEMIsBuffer(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:537

llvm::AMDGPU::Imm
@ Imm
Definition AMDGPURegBankLegalizeRules.h:129

llvm::AMDGPU::mfmaScaleF8F6F4FormatToNumRegs
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
Definition AMDGPUBaseInfo.cpp:578

llvm::AMDGPU::getIsaVersion
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
Definition TargetParser.cpp:275

llvm::AMDGPU::isValid32BitLiteral
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
Definition AMDGPUBaseInfo.cpp:3150

llvm::AMDGPU::getCanBeVOPD
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
Definition AMDGPUBaseInfo.cpp:635

llvm::AMDGPU::isLegalDPALU_DPPControl
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
Definition AMDGPUBaseInfo.h:1766

llvm::AMDGPU::isSI
bool isSI(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2460

llvm::AMDGPU::decodeLgkmcnt
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
Definition AMDGPUBaseInfo.cpp:1781

llvm::AMDGPU::getWaitcntBitMask
unsigned getWaitcntBitMask(const IsaVersion &Version)
Definition AMDGPUBaseInfo.cpp:1756

llvm::AMDGPU::hasNamedOperand
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
Definition AMDGPUBaseInfo.h:414

llvm::AMDGPU::isGFX9
bool isGFX9(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2472

llvm::AMDGPU::getVOPDEncodingFamily
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
Definition AMDGPUBaseInfo.cpp:625

llvm::AMDGPU::isGFX10_AEncoding
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2550

llvm::AMDGPU::isKImmOperand
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
Definition AMDGPUBaseInfo.cpp:2733

llvm::AMDGPU::isGFX90A
bool isGFX90A(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2566

llvm::AMDGPU::getMIMGDimInfoByEncoding
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)

llvm::AMDGPU::isInlinableLiteral32
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
Definition AMDGPUBaseInfo.cpp:2959

llvm::AMDGPU::isGFX12
bool isGFX12(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2518

llvm::AMDGPU::encodeExpcnt
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
Definition AMDGPUBaseInfo.cpp:1810

llvm::AMDGPU::hasMAIInsts
bool hasMAIInsts(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2578

llvm::AMDGPU::isSISrcOperand
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
Definition AMDGPUBaseInfo.h:1594

llvm::AMDGPU::isInlinableIntLiteral
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
Definition AMDGPUBaseInfo.h:1674

llvm::AMDGPU::isDPALU_DPP
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCSubtargetInfo &ST)
Definition AMDGPUBaseInfo.cpp:3504

llvm::AMDGPU::getMIMGDimInfoByAsmSuffix
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)

llvm::AMDGPU::hasMIMG_R128
bool hasMIMG_R128(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2421

llvm::AMDGPU::hasG16
bool hasG16(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2430

llvm::AMDGPU::getAddrSizeMIMGOp
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
Definition AMDGPUBaseInfo.cpp:323

llvm::AMDGPU::hasArchitectedFlatScratch
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2574

llvm::AMDGPU::isGFX11Plus
bool isGFX11Plus(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2514

llvm::AMDGPU::isInlineValue
bool isInlineValue(unsigned Reg)
Definition AMDGPUBaseInfo.cpp:2702

llvm::AMDGPU::isSISrcFPOperand
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
Definition AMDGPUBaseInfo.cpp:2740

llvm::AMDGPU::isGFX10Plus
bool isGFX10Plus(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2506

llvm::AMDGPU::OperandType
OperandType
Definition SIDefines.h:199

llvm::AMDGPU::OPERAND_KIMM32
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:231

llvm::AMDGPU::OPERAND_REG_IMM_INT64
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:202

llvm::AMDGPU::OPERAND_REG_IMM_V2FP16
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:209

llvm::AMDGPU::OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:222

llvm::AMDGPU::OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:219

llvm::AMDGPU::OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:224

llvm::AMDGPU::OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:210

llvm::AMDGPU::OPERAND_REG_IMM_BF16
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:206

llvm::AMDGPU::OPERAND_REG_IMM_INT32
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:201

llvm::AMDGPU::OPERAND_REG_IMM_V2BF16
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:208

llvm::AMDGPU::OPERAND_REG_IMM_FP16
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:207

llvm::AMDGPU::OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:218

llvm::AMDGPU::OPERAND_KIMM64
@ OPERAND_KIMM64
Definition SIDefines.h:233

llvm::AMDGPU::OPERAND_KIMM16
@ OPERAND_KIMM16
Definition SIDefines.h:232

llvm::AMDGPU::OPERAND_REG_INLINE_C_INT16
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:216

llvm::AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:211

llvm::AMDGPU::OPERAND_REG_IMM_FP64
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:205

llvm::AMDGPU::OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:225

llvm::AMDGPU::OPERAND_REG_INLINE_AC_INT32
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:236

llvm::AMDGPU::OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:237

llvm::AMDGPU::OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:212

llvm::AMDGPU::OPERAND_REG_IMM_FP32
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:204

llvm::AMDGPU::OPERAND_INPUT_MODS
@ OPERAND_INPUT_MODS
Definition SIDefines.h:245

llvm::AMDGPU::OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:221

llvm::AMDGPU::OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:217

llvm::AMDGPU::OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:223

llvm::AMDGPU::OPERAND_REG_IMM_V2FP32
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:213

llvm::AMDGPU::OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:238

llvm::AMDGPU::OPERAND_REG_INLINE_C_FP16
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:220

llvm::AMDGPU::OPERAND_REG_IMM_INT16
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:203

llvm::AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition SIDefines.h:228

llvm::AMDGPU::hasGDS
bool hasGDS(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2439

llvm::AMDGPU::isLegalSMRDEncodedUnsignedOffset
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
Definition AMDGPUBaseInfo.cpp:3216

llvm::AMDGPU::isGFX9Plus
bool isGFX9Plus(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2492

llvm::AMDGPU::hasDPPSrc1SGPR
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2586

llvm::AMDGPU::OPR_ID_DUPLICATE
const int OPR_ID_DUPLICATE
Definition AMDGPUAsmUtils.h:25

llvm::AMDGPU::isVOPD
bool isVOPD(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:660

llvm::AMDGPU::getVOPDInstInfo
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
Definition AMDGPUBaseInfo.cpp:993

llvm::AMDGPU::encodeVmcnt
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
Definition AMDGPUBaseInfo.cpp:1801

llvm::AMDGPU::decodeExpcnt
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
Definition AMDGPUBaseInfo.cpp:1776

llvm::AMDGPU::isGFX1250
bool isGFX1250(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2526

llvm::AMDGPU::getMIMGBaseOpcode
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:310

llvm::AMDGPU::isVI
bool isVI(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2468

llvm::AMDGPU::supportsScaleOffset
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
Definition AMDGPUBaseInfo.cpp:3451

llvm::AMDGPU::mc2PseudoReg
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
Definition AMDGPUBaseInfo.cpp:2700

llvm::AMDGPU::hasKernargPreload
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2590

llvm::AMDGPU::supportsWGP
bool supportsWGP(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2530

llvm::AMDGPU::isMAC
bool isMAC(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:664

llvm::AMDGPU::getOperandSize
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
Definition AMDGPUBaseInfo.h:1623

llvm::AMDGPU::isCI
bool isCI(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2464

llvm::AMDGPU::encodeLgkmcnt
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
Definition AMDGPUBaseInfo.cpp:1816

llvm::AMDGPU::getMIMGBaseOpcodeInfo
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)

llvm::AMDGPU::OPR_ID_UNKNOWN
const int OPR_ID_UNKNOWN
Definition AMDGPUAsmUtils.h:23

llvm::AMDGPU::decodeVmcnt
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
Definition AMDGPUBaseInfo.cpp:1768

llvm::AMDGPU::isInlinableLiteralI16
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
Definition AMDGPUBaseInfo.cpp:3002

llvm::AMDGPU::hasVOPD
bool hasVOPD(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2582

llvm::AMDGPU::isInlinableLiteral64
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
Definition AMDGPUBaseInfo.cpp:2942

llvm::AMDGPU::isPermlane16
bool isPermlane16(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:691

llvm::ARMBuildAttrs::Symbol
@ Symbol
Definition ARMBuildAttributes.h:84

llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition BitmaskEnum.h:126

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34

llvm::ELF::STT_AMDGPU_HSA_KERNEL
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1422

llvm::FPOpFusion::Strict
@ Strict
Definition TargetOptions.h:33

llvm::GraphProgram::Name
Name
Definition GraphWriter.h:51

llvm::ISD::UNDEF
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228

llvm::ISD::MCSymbol
@ MCSymbol
Definition ISDOpcodes.h:188

llvm::Loc
Definition DwarfDebug.h:129

llvm::M68kBeads::Ctrl
@ Ctrl
Definition M68kBaseInfo.h:98

llvm::M68k::MemAddrModeKind::u
@ u
Definition M68kBaseInfo.h:60

llvm::MCOI::TIED_TO
@ TIED_TO
Definition MCInstrDesc.h:37

llvm::MCOI::OPERAND_IMMEDIATE
@ OPERAND_IMMEDIATE
Definition MCInstrDesc.h:61

llvm::MSP430Attrs::ISA
ISA
Definition MSP430Attributes.h:37

llvm::MipsISD::Ret
@ Ret
Definition MipsISelLowering.h:117

llvm::PPC::getPredicate
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition PPCPredicates.h:87

llvm::RISCVFeatures::validate
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Definition RISCVBaseInfo.cpp:129

llvm::RISCVFenceField::R
@ R
Definition RISCVBaseInfo.h:400

llvm::SIInstrFlags::IsAtomicRet
@ IsAtomicRet
Definition SIDefines.h:164

llvm::SIInstrFlags::DPP
@ DPP
Definition SIDefines.h:77

llvm::SIInstrFlags::MIMG
@ MIMG
Definition SIDefines.h:84

llvm::SIInstrFlags::VOP1
@ VOP1
Definition SIDefines.h:67

llvm::SIInstrFlags::GWS
@ GWS
Definition SIDefines.h:176

llvm::SIInstrFlags::FlatGlobal
@ FlatGlobal
Definition SIDefines.h:143

llvm::SIInstrFlags::Gather4
@ Gather4
Definition SIDefines.h:110

llvm::SIInstrFlags::IsSWMMAC
@ IsSWMMAC
Definition SIDefines.h:179

llvm::SIInstrFlags::SOP2
@ SOP2
Definition SIDefines.h:61

llvm::SIInstrFlags::MUBUF
@ MUBUF
Definition SIDefines.h:81

llvm::SIInstrFlags::SMRD
@ SMRD
Definition SIDefines.h:83

llvm::SIInstrFlags::TENSOR_CNT
@ TENSOR_CNT
Definition SIDefines.h:112

llvm::SIInstrFlags::SDWA
@ SDWA
Definition SIDefines.h:76

llvm::SIInstrFlags::VOPC
@ VOPC
Definition SIDefines.h:69

llvm::SIInstrFlags::MTBUF
@ MTBUF
Definition SIDefines.h:82

llvm::SIInstrFlags::VOP3P
@ VOP3P
Definition SIDefines.h:73

llvm::SIInstrFlags::VOP2
@ VOP2
Definition SIDefines.h:68

llvm::SIInstrFlags::VSAMPLE
@ VSAMPLE
Definition SIDefines.h:86

llvm::SIInstrFlags::VOP3
@ VOP3
Definition SIDefines.h:72

llvm::SIInstrFlags::DS
@ DS
Definition SIDefines.h:89

llvm::SIInstrFlags::FlatScratch
@ FlatScratch
Definition SIDefines.h:158

llvm::SIInstrFlags::IsAtomicNoRet
@ IsAtomicNoRet
Definition SIDefines.h:161

llvm::SIInstrFlags::IsMAI
@ IsMAI
Definition SIDefines.h:152

llvm::SIInstrFlags::IsPacked
@ IsPacked
Definition SIDefines.h:137

llvm::SIInstrFlags::FLAT
@ FLAT
Definition SIDefines.h:88

llvm::SIInstrFlags::IsDOT
@ IsDOT
Definition SIDefines.h:155

llvm::SIInstrFlags::VOPD3
@ VOPD3
Definition SIDefines.h:101

llvm::SIInstrFlags::SOPC
@ SOPC
Definition SIDefines.h:62

llvm::SIInstrFlags::IsWMMA
@ IsWMMA
Definition SIDefines.h:167

llvm::SIInstrFlags::IntClamp
@ IntClamp
Definition SIDefines.h:127

llvm::SIInstrFlags::VIMAGE
@ VIMAGE
Definition SIDefines.h:85

llvm::SISrcMods::SEXT
@ SEXT
Definition SIDefines.h:275

llvm::SISrcMods::ABS
@ ABS
Definition SIDefines.h:274

llvm::SISrcMods::OP_SEL_0
@ OP_SEL_0
Definition SIDefines.h:277

llvm::SISrcMods::DST_OP_SEL
@ DST_OP_SEL
Definition SIDefines.h:279

llvm::SISrcMods::NEG_HI
@ NEG_HI
Definition SIDefines.h:276

llvm::SISrcMods::OP_SEL_1
@ OP_SEL_1
Definition SIDefines.h:278

llvm::SISrcMods::NEG
@ NEG
Definition SIDefines.h:273

llvm::amdhsa
Definition AMDHSAKernelDescriptor.h:58

llvm::cl::Prefix
@ Prefix
Definition CommandLine.h:159

llvm::dwarf::Index
Index
Definition Dwarf.h:889

llvm::jitlink::Scope
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
Definition JITLink.h:413

llvm::lltok::Kind
Kind
Definition LLToken.h:18

llvm::lltok::APFloat
@ APFloat
Definition LLToken.h:513

llvm::logicalview::LVCompareKind::Scopes
@ Scopes
Definition LVOptions.h:137

llvm::ms_demangle::QualifierMangleMode::Result
@ Result
Definition MicrosoftDemangle.h:132

llvm::pdb::PDB_LocType::Slot
@ Slot
Definition PDBTypes.h:300

llvm::pdb::PDB_ColorItem::LiteralValue
@ LiteralValue
Definition LinePrinter.h:168

llvm::sandboxir::getContext
Context & getContext() const
Definition BasicBlock.h:99

llvm::tgtok::Bit
@ Bit
Definition TGLexer.h:78

llvm::tgtok::IntVal
@ IntVal
Definition TGLexer.h:62

llvm::yaml::isNull
bool isNull(StringRef S)
Definition YAMLTraits.h:578

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition AddressRanges.h:18

llvm::errorToBool
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
Definition Error.h:1113

llvm::Offset
@ Offset
Definition DWP.cpp:477

llvm::Value
FunctionAddr VTableAddr Value
Definition InstrProf.h:137

llvm::ValueName
StringMapEntry< Value * > ValueName
Definition Value.h:56

llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1727

llvm::popcount
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:307

llvm::print
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
Definition GCNRegPressure.cpp:237

llvm::encode
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
Definition Alignment.h:217

llvm::isInt
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174

llvm::isMem
static bool isMem(const MachineInstr &MI, unsigned Op)
Definition X86InstrInfo.h:170

llvm::getToken
LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
Definition StringExtras.cpp:39

llvm::getCPU
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
Definition AVRTargetMachine.cpp:35

llvm::Failed
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198

llvm::PrintError
void PrintError(const Twine &Msg)
Definition Error.cpp:104

llvm::isUIntN
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:252

llvm::DataSize
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
Definition InstrProf.h:267

llvm::isPowerOf2_64
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293

llvm::bit_ceil
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:295

llvm::Desc
Op::Description Desc
Definition DWARFExpressionPrinter.cpp:23

llvm::getTheR600Target
Target & getTheR600Target()
The target for R600 GPUs.
Definition AMDGPUTargetInfo.cpp:20

llvm::OperandVector
SmallVectorImpl< std::unique_ptr< MCParsedAsmOperand > > OperandVector
Definition MCTargetAsmParser.h:34

llvm::Version
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302

llvm::getImm
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
Definition SPIRVUtils.cpp:976

llvm::Hi_32
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:159

llvm::isUInt
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198

llvm::SmallVector
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
Definition SmallVector.h:1123

llvm::Lo_32
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:164

llvm::isa
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548

llvm::Key
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
Definition PassManager.h:668

llvm::MutableArrayRef
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >

llvm::PackElem::Hi
@ Hi
Definition VECustomDAG.h:132

llvm::PackElem::Lo
@ Lo
Definition VECustomDAG.h:131

llvm::divideCeil
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:399

llvm::IRMemLocation::First
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71

llvm::getTheGCNTarget
Target & getTheGCNTarget()
The target for GCN GPUs.
Definition AMDGPUTargetInfo.cpp:26

llvm::RecurKind::Sub
@ Sub
Subtraction of integers.
Definition IVDescriptors.h:38

llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21

llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:155

llvm::Op
DWARFExpression::Operation Op
Definition DWARFExpressionPrinter.cpp:22

llvm::ReplacementType::Format
@ Format
Definition FormatVariadic.h:47

llvm::ReplacementType::Literal
@ Literal
Definition FormatVariadic.h:47

llvm::operator<<
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition APFixedPoint.h:312

llvm::M0
unsigned M0(unsigned Val)
Definition VE.h:376

llvm::ArrayRef
ArrayRef(const T &OneElt) -> ArrayRef< T >

llvm::toString
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
Definition StringExtras.h:344

llvm::HighlightColor::Warning
@ Warning
Definition WithColor.h:35

llvm::HighlightColor::String
@ String
Definition WithColor.h:29

llvm::HighlightColor::Error
@ Error
Definition WithColor.h:34

llvm::find_if
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1760

llvm::isIntN
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:257

llvm::InstructionUniformity::Default
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20

llvm::PGSOQueryType::Test
@ Test
Definition SizeOpts.h:37

N
#define N

RegInfo
Definition AMDGPUAsmParser.cpp:2899

RegInfo::Kind
RegisterKind Kind
Definition AMDGPUAsmParser.cpp:2901

RegInfo::Name
StringLiteral Name
Definition AMDGPUAsmParser.cpp:2900

Status
Definition SIModeRegister.cpp:29

llvm::AMDGPU::AMDGPUMCKernelCodeT
Definition AMDKernelCodeTUtils.h:33

llvm::AMDGPU::AMDGPUMCKernelCodeT::validate
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
Definition AMDKernelCodeTUtils.cpp:392

llvm::AMDGPU::AMDGPUMCKernelCodeT::initDefault
void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, bool InitMCExpr=true)
Definition AMDKernelCodeTUtils.cpp:373

llvm::AMDGPU::IsaVersion
Instruction set architecture version.
Definition TargetParser.h:132

llvm::AMDGPU::IsaVersion::Major
unsigned Major
Definition TargetParser.h:133

llvm::AMDGPU::MCKernelDescriptor::compute_pgm_rsrc2
const MCExpr * compute_pgm_rsrc2
Definition AMDGPUMCKernelDescriptor.h:33

llvm::AMDGPU::MCKernelDescriptor::kernarg_size
const MCExpr * kernarg_size
Definition AMDGPUMCKernelDescriptor.h:30

llvm::AMDGPU::MCKernelDescriptor::kernarg_preload
const MCExpr * kernarg_preload
Definition AMDGPUMCKernelDescriptor.h:35

llvm::AMDGPU::MCKernelDescriptor::compute_pgm_rsrc3
const MCExpr * compute_pgm_rsrc3
Definition AMDGPUMCKernelDescriptor.h:31

llvm::AMDGPU::MCKernelDescriptor::private_segment_fixed_size
const MCExpr * private_segment_fixed_size
Definition AMDGPUMCKernelDescriptor.h:29

llvm::AMDGPU::MCKernelDescriptor::compute_pgm_rsrc1
const MCExpr * compute_pgm_rsrc1
Definition AMDGPUMCKernelDescriptor.h:32

llvm::AMDGPU::MCKernelDescriptor::bits_set
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
Definition AMDGPUMCKernelDescriptor.cpp:87

llvm::AMDGPU::MCKernelDescriptor::group_segment_fixed_size
const MCExpr * group_segment_fixed_size
Definition AMDGPUMCKernelDescriptor.h:28

llvm::AMDGPU::MCKernelDescriptor::getDefaultAmdhsaKernelDescriptor
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
Definition AMDGPUMCKernelDescriptor.cpp:21

llvm::AMDGPU::MCKernelDescriptor::kernel_code_properties
const MCExpr * kernel_code_properties
Definition AMDGPUMCKernelDescriptor.h:34

llvm::AMDGPU::MIMGBaseOpcodeInfo::MSAA
bool MSAA
Definition AMDGPUBaseInfo.h:435

llvm::AMDGPU::MIMGBaseOpcodeInfo::A16
bool A16
Definition AMDGPUBaseInfo.h:437

llvm::AMDGPU::MIMGBaseOpcodeInfo::BVH
bool BVH
Definition AMDGPUBaseInfo.h:436

llvm::AMDGPU::MIMGDimInfo::MSAA
bool MSAA
Definition AMDGPUBaseInfo.h:452

llvm::AMDGPU::MIMGDimInfo::Encoding
uint8_t Encoding
Definition AMDGPUBaseInfo.h:454

llvm::APFloatBase::IEEEsingle
static LLVM_ABI const fltSemantics & IEEEsingle() LLVM_READNONE
Definition APFloat.cpp:266

llvm::APFloatBase::rmNearestTiesToEven
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304

llvm::APFloatBase::IEEEdouble
static LLVM_ABI const fltSemantics & IEEEdouble() LLVM_READNONE
Definition APFloat.cpp:267

llvm::APFloatBase::IEEEhalf
static LLVM_ABI const fltSemantics & IEEEhalf() LLVM_READNONE
Definition APFloat.cpp:264

llvm::APFloatBase::BFloat
static LLVM_ABI const fltSemantics & BFloat() LLVM_READNONE
Definition APFloat.cpp:265

llvm::APFloatBase::opStatus
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:320

llvm::APFloatBase::opOverflow
@ opOverflow
Definition APFloat.h:324

llvm::APFloatBase::opUnderflow
@ opUnderflow
Definition APFloat.h:325

llvm::APFloatBase::opOK
@ opOK
Definition APFloat.h:321

llvm::ParseInstructionInfo
Definition MCTargetAsmParser.h:118

llvm::RegisterMCAsmParser
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...
Definition TargetRegistry.h:1350

llvm::amdhsa::kernel_descriptor_t::group_segment_fixed_size
uint32_t group_segment_fixed_size
Definition AMDHSAKernelDescriptor.h:273

llvm::amdhsa::kernel_descriptor_t::private_segment_fixed_size
uint32_t private_segment_fixed_size
Definition AMDHSAKernelDescriptor.h:274

llvm::amdhsa::kernel_descriptor_t::kernarg_size
uint32_t kernarg_size
Definition AMDHSAKernelDescriptor.h:275

llvm::fltSemantics
Definition APFloat.cpp:103