LLVM: lib/Target/AMDGPU/SIFrameLowering.cpp Source File

//===----------------------- SIFrameLowering.cpp --------------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//==-----------------------------------------------------------------------===//


#include "SIFrameLowering.h"

#include "AMDGPU.h"

#include "GCNSubtarget.h"

#include "MCTargetDesc/AMDGPUMCTargetDesc.h"

#include "SIMachineFunctionInfo.h"

#include "llvm/CodeGen/LiveRegUnits.h"

#include "llvm/CodeGen/MachineFrameInfo.h"

#include "llvm/CodeGen/RegisterScavenging.h"

#include "llvm/Target/TargetMachine.h"


using namespace llvm;


#define DEBUG_TYPE "frame-info"


static cl::opt<bool> EnableSpillVGPRToAGPR(

  "amdgpu-spill-vgpr-to-agpr",

  cl::desc("Enable spilling VGPRs to AGPRs"),

  cl::ReallyHidden,

  cl::init(true));


// Find a register matching \p RC from \p LiveUnits which is unused and

// available throughout the function. On failure, returns AMDGPU::NoRegister.

// TODO: Rewrite the loop here to iterate over MCRegUnits instead of

// MCRegisters. This should reduce the number of iterations and avoid redundant

// checking.


static MCRegister findUnusedRegister(MachineRegisterInfo &MRI,

                                     const LiveRegUnits &LiveUnits,

                                     const TargetRegisterClass &RC) {

  for (MCRegister Reg : RC) {

    if (!MRI.isPhysRegUsed(Reg) && LiveUnits.available(Reg) &&

        !MRI.isReserved(Reg))

      return Reg;

  }

  return MCRegister();

}


// Find a scratch register that we can use in the prologue. We avoid using

// callee-save registers since they may appear to be free when this is called

// from canUseAsPrologue (during shrink wrapping), but then no longer be free

// when this is called from emitPrologue.


static MCRegister findScratchNonCalleeSaveRegister(

    MachineRegisterInfo &MRI, LiveRegUnits &LiveUnits,

    const TargetRegisterClass &RC, bool Unused = false) {

  // Mark callee saved registers as used so we will not choose them.

  const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();

  for (unsigned i = 0; CSRegs[i]; ++i)

    LiveUnits.addReg(CSRegs[i]);


  // We are looking for a register that can be used throughout the entire

  // function, so any use is unacceptable.

  if (Unused)

    return findUnusedRegister(MRI, LiveUnits, RC);


  for (MCRegister Reg : RC) {

    if (LiveUnits.available(Reg) && !MRI.isReserved(Reg))

      return Reg;

  }


  return MCRegister();

}


/// Query target location for spilling SGPRs

/// \p IncludeScratchCopy : Also look for free scratch SGPRs


static void getVGPRSpillLaneOrTempRegister(

    MachineFunction &MF, LiveRegUnits &LiveUnits, Register SGPR,

    const TargetRegisterClass &RC = AMDGPU::SReg_32_XM0_XEXECRegClass,

    bool IncludeScratchCopy = true) {

  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

  MachineFrameInfo &FrameInfo = MF.getFrameInfo();


  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIRegisterInfo *TRI = ST.getRegisterInfo();

  unsigned Size = TRI->getSpillSize(RC);

  Align Alignment = TRI->getSpillAlign(RC);


  // We need to save and restore the given SGPR.


  Register ScratchSGPR;

  // 1: Try to save the given register into an unused scratch SGPR. The

  // LiveUnits should have all the callee saved registers marked as used. For

  // certain cases we skip copy to scratch SGPR.

  if (IncludeScratchCopy)

    ScratchSGPR = findUnusedRegister(MF.getRegInfo(), LiveUnits, RC);


  if (!ScratchSGPR) {

    int FI = FrameInfo.CreateStackObject(Size, Alignment, true, nullptr,

                                         TargetStackID::SGPRSpill);


    if (TRI->spillSGPRToVGPR() &&

        MFI->allocateSGPRSpillToVGPRLane(MF, FI, /*SpillToPhysVGPRLane=*/true,

                                         /*IsPrologEpilog=*/true)) {

      // 2: There's no free lane to spill, and no free register to save the

      // SGPR, so we're forced to take another VGPR to use for the spill.

      MFI->addToPrologEpilogSGPRSpills(

          SGPR, PrologEpilogSGPRSaveRestoreInfo(

                    SGPRSaveKind::SPILL_TO_VGPR_LANE, FI));


      LLVM_DEBUG(auto Spill = MFI->getSGPRSpillToPhysicalVGPRLanes(FI).front();

                 dbgs() << printReg(SGPR, TRI) << " requires fallback spill to "

                        << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane

                        << '\n';);

    } else {

      // Remove dead <FI> index

      MF.getFrameInfo().RemoveStackObject(FI);

      // 3: If all else fails, spill the register to memory.

      FI = FrameInfo.CreateSpillStackObject(Size, Alignment);

      MFI->addToPrologEpilogSGPRSpills(

          SGPR,

          PrologEpilogSGPRSaveRestoreInfo(SGPRSaveKind::SPILL_TO_MEM, FI));

      LLVM_DEBUG(dbgs() << "Reserved FI " << FI << " for spilling "

                        << printReg(SGPR, TRI) << '\n');

    }

  } else {

    MFI->addToPrologEpilogSGPRSpills(

        SGPR, PrologEpilogSGPRSaveRestoreInfo(

                  SGPRSaveKind::COPY_TO_SCRATCH_SGPR, ScratchSGPR));

    LiveUnits.addReg(ScratchSGPR);

    LLVM_DEBUG(dbgs() << "Saving " << printReg(SGPR, TRI) << " with copy to "

                      << printReg(ScratchSGPR, TRI) << '\n');

  }

}


// We need to specially emit stack operations here because a different frame

// register is used than in the rest of the function, as getFrameRegister would

// use.


static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,

                             const SIMachineFunctionInfo &FuncInfo,

                             LiveRegUnits &LiveUnits, MachineFunction &MF,

                             MachineBasicBlock &MBB,

                             MachineBasicBlock::iterator I, const DebugLoc &DL,

                             Register SpillReg, int FI, Register FrameReg,

                             int64_t DwordOff = 0) {

  unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR

                                        : AMDGPU::BUFFER_STORE_DWORD_OFFSET;


  MachineFrameInfo &FrameInfo = MF.getFrameInfo();

  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);

  MachineMemOperand *MMO = MF.getMachineMemOperand(

      PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI),

      FrameInfo.getObjectAlign(FI));

  LiveUnits.addReg(SpillReg);

  bool IsKill = !MBB.isLiveIn(SpillReg);

  TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, IsKill, FrameReg,

                          DwordOff, MMO, nullptr, &LiveUnits);

  if (IsKill)

    LiveUnits.removeReg(SpillReg);

}


static void buildEpilogRestore(const GCNSubtarget &ST,

                               const SIRegisterInfo &TRI,

                               const SIMachineFunctionInfo &FuncInfo,

                               LiveRegUnits &LiveUnits, MachineFunction &MF,

                               MachineBasicBlock &MBB,

                               MachineBasicBlock::iterator I,

                               const DebugLoc &DL, Register SpillReg, int FI,

                               Register FrameReg, int64_t DwordOff = 0) {

  unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR

                                        : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;


  MachineFrameInfo &FrameInfo = MF.getFrameInfo();

  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);

  MachineMemOperand *MMO = MF.getMachineMemOperand(

      PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),

      FrameInfo.getObjectAlign(FI));

  TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false, FrameReg,

                          DwordOff, MMO, nullptr, &LiveUnits);

}


static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,

                        const DebugLoc &DL, const SIInstrInfo *TII,

                        Register TargetReg) {

  MachineFunction *MF = MBB.getParent();

  const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();

  const SIRegisterInfo *TRI = &TII->getRegisterInfo();

  const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);

  Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0);

  Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1);


  if (MFI->getGITPtrHigh() != 0xffffffff) {

    BuildMI(MBB, I, DL, SMovB32, TargetHi)

        .addImm(MFI->getGITPtrHigh())

        .addReg(TargetReg, RegState::ImplicitDefine);

  } else {

    const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64_pseudo);

    BuildMI(MBB, I, DL, GetPC64, TargetReg);

  }

  Register GitPtrLo = MFI->getGITPtrLoReg(*MF);

  MF->getRegInfo().addLiveIn(GitPtrLo);

  MBB.addLiveIn(GitPtrLo);

  BuildMI(MBB, I, DL, SMovB32, TargetLo)

    .addReg(GitPtrLo);

}


static void initLiveUnits(LiveRegUnits &LiveUnits, const SIRegisterInfo &TRI,

                          const SIMachineFunctionInfo *FuncInfo,

                          MachineFunction &MF, MachineBasicBlock &MBB,

                          MachineBasicBlock::iterator MBBI, bool IsProlog) {

  if (LiveUnits.empty()) {

    LiveUnits.init(TRI);

    if (IsProlog) {

      LiveUnits.addLiveIns(MBB);

    } else {

      // In epilog.

      LiveUnits.addLiveOuts(MBB);

      LiveUnits.stepBackward(*MBBI);

    }

  }

}


namespace llvm {


// SpillBuilder to save/restore special SGPR spills like the one needed for FP,

// BP, etc. These spills are delayed until the current function's frame is

// finalized. For a given register, the builder uses the

// PrologEpilogSGPRSaveRestoreInfo to decide the spill method.


class PrologEpilogSGPRSpillBuilder {

  MachineBasicBlock::iterator MI;

  MachineBasicBlock &MBB;

  MachineFunction &MF;

  const GCNSubtarget &ST;

  MachineFrameInfo &MFI;

  SIMachineFunctionInfo *FuncInfo;

  const SIInstrInfo *TII;

  const SIRegisterInfo &TRI;

  Register SuperReg;

  const PrologEpilogSGPRSaveRestoreInfo SI;

  LiveRegUnits &LiveUnits;

  const DebugLoc &DL;

  Register FrameReg;

  ArrayRef<int16_t> SplitParts;

  unsigned NumSubRegs;

  unsigned EltSize = 4;


  void saveToMemory(const int FI) const {

    MachineRegisterInfo &MRI = MF.getRegInfo();

    assert(!MFI.isDeadObjectIndex(FI));


    initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ true);


    MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(

        MRI, LiveUnits, AMDGPU::VGPR_32RegClass);

    if (!TmpVGPR)

      report_fatal_error("failed to find free scratch register");


    for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) {

      Register SubReg = NumSubRegs == 1

                            ? SuperReg

                            : Register(TRI.getSubReg(SuperReg, SplitParts[I]));

      BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)

          .addReg(SubReg);


      buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL, TmpVGPR,

                       FI, FrameReg, DwordOff);

      DwordOff += 4;

    }

  }


  void saveToVGPRLane(const int FI) const {

    assert(!MFI.isDeadObjectIndex(FI));


    assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);

    ArrayRef<SIRegisterInfo::SpilledReg> Spill =

        FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);

    assert(Spill.size() == NumSubRegs);


    for (unsigned I = 0; I < NumSubRegs; ++I) {

      Register SubReg = NumSubRegs == 1

                            ? SuperReg

                            : Register(TRI.getSubReg(SuperReg, SplitParts[I]));

      BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_S32_TO_VGPR),

              Spill[I].VGPR)

          .addReg(SubReg)

          .addImm(Spill[I].Lane)

          .addReg(Spill[I].VGPR, RegState::Undef);

    }

  }


  void copyToScratchSGPR(Register DstReg) const {

    BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), DstReg)

        .addReg(SuperReg)

        .setMIFlag(MachineInstr::FrameSetup);

  }


  void restoreFromMemory(const int FI) {

    MachineRegisterInfo &MRI = MF.getRegInfo();


    initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ false);

    MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(

        MRI, LiveUnits, AMDGPU::VGPR_32RegClass);

    if (!TmpVGPR)

      report_fatal_error("failed to find free scratch register");


    for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) {

      Register SubReg = NumSubRegs == 1

                            ? SuperReg

                            : Register(TRI.getSubReg(SuperReg, SplitParts[I]));


      buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL,

                         TmpVGPR, FI, FrameReg, DwordOff);

      MRI.constrainRegClass(SubReg, &AMDGPU::SReg_32_XM0RegClass);

      BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)

          .addReg(TmpVGPR, RegState::Kill);

      DwordOff += 4;

    }

  }


  void restoreFromVGPRLane(const int FI) {

    assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);

    ArrayRef<SIRegisterInfo::SpilledReg> Spill =

        FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);

    assert(Spill.size() == NumSubRegs);


    for (unsigned I = 0; I < NumSubRegs; ++I) {

      Register SubReg = NumSubRegs == 1

                            ? SuperReg

                            : Register(TRI.getSubReg(SuperReg, SplitParts[I]));

      BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)

          .addReg(Spill[I].VGPR)

          .addImm(Spill[I].Lane);

    }

  }


  void copyFromScratchSGPR(Register SrcReg) const {

    BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), SuperReg)

        .addReg(SrcReg)

        .setMIFlag(MachineInstr::FrameDestroy);

  }


public:


  PrologEpilogSGPRSpillBuilder(Register Reg,

                               const PrologEpilogSGPRSaveRestoreInfo SI,

                               MachineBasicBlock &MBB,

                               MachineBasicBlock::iterator MI,

                               const DebugLoc &DL, const SIInstrInfo *TII,

                               const SIRegisterInfo &TRI,

                               LiveRegUnits &LiveUnits, Register FrameReg)

      : MI(MI), MBB(MBB), MF(*MBB.getParent()),

        ST(MF.getSubtarget<GCNSubtarget>()), MFI(MF.getFrameInfo()),

        FuncInfo(MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),

        SuperReg(Reg), SI(SI), LiveUnits(LiveUnits), DL(DL),

        FrameReg(FrameReg) {

    const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(SuperReg);

    SplitParts = TRI.getRegSplitParts(RC, EltSize);

    NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();


    assert(SuperReg != AMDGPU::M0 && "m0 should never spill");

  }


  void save() {

    switch (SI.getKind()) {

    case SGPRSaveKind::SPILL_TO_MEM:

      return saveToMemory(SI.getIndex());

    case SGPRSaveKind::SPILL_TO_VGPR_LANE:

      return saveToVGPRLane(SI.getIndex());

    case SGPRSaveKind::COPY_TO_SCRATCH_SGPR:

      return copyToScratchSGPR(SI.getReg());

    }

  }


  void restore() {

    switch (SI.getKind()) {

    case SGPRSaveKind::SPILL_TO_MEM:

      return restoreFromMemory(SI.getIndex());

    case SGPRSaveKind::SPILL_TO_VGPR_LANE:

      return restoreFromVGPRLane(SI.getIndex());

    case SGPRSaveKind::COPY_TO_SCRATCH_SGPR:

      return copyFromScratchSGPR(SI.getReg());

    }

  }


};


} // namespace llvm


// Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`

void SIFrameLowering::emitEntryFunctionFlatScratchInit(

    MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,

    const DebugLoc &DL, Register ScratchWaveOffsetReg) const {

  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIInstrInfo *TII = ST.getInstrInfo();

  const SIRegisterInfo *TRI = &TII->getRegisterInfo();

  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();


  // We don't need this if we only have spills since there is no user facing

  // scratch.


  // TODO: If we know we don't have flat instructions earlier, we can omit

  // this from the input registers.

  //

  // TODO: We only need to know if we access scratch space through a flat

  // pointer. Because we only detect if flat instructions are used at all,

  // this will be used more often than necessary on VI.


  Register FlatScrInitLo;

  Register FlatScrInitHi;


  if (ST.isAmdPalOS()) {

    // Extract the scratch offset from the descriptor in the GIT

    LiveRegUnits LiveUnits;

    LiveUnits.init(*TRI);

    LiveUnits.addLiveIns(MBB);


    // Find unused reg to load flat scratch init into

    MachineRegisterInfo &MRI = MF.getRegInfo();

    Register FlatScrInit = AMDGPU::NoRegister;

    ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF);

    unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2;

    AllSGPR64s = AllSGPR64s.slice(

        std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded));

    Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);

    for (MCPhysReg Reg : AllSGPR64s) {

      if (LiveUnits.available(Reg) && !MRI.isReserved(Reg) &&

          MRI.isAllocatable(Reg) && !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {

        FlatScrInit = Reg;

        break;

      }

    }

    assert(FlatScrInit && "Failed to find free register for scratch init");


    FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0);

    FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1);


    buildGitPtr(MBB, I, DL, TII, FlatScrInit);


    // We now have the GIT ptr - now get the scratch descriptor from the entry

    // at offset 0 (or offset 16 for a compute shader).

    MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);

    const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);

    auto *MMO = MF.getMachineMemOperand(

        PtrInfo,

        MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |

            MachineMemOperand::MODereferenceable,

        8, Align(4));

    unsigned Offset =

        MF.getFunction().getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;

    const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();

    unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);

    BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit)

        .addReg(FlatScrInit)

        .addImm(EncodedOffset) // offset

        .addImm(0)             // cpol

        .addMemOperand(MMO);


    // Mask the offset in [47:0] of the descriptor

    const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32);

    auto And = BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi)

        .addReg(FlatScrInitHi)

        .addImm(0xffff);

    And->getOperand(3).setIsDead(); // Mark SCC as dead.

  } else {

    Register FlatScratchInitReg =

        MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);

    assert(FlatScratchInitReg);


    MachineRegisterInfo &MRI = MF.getRegInfo();

    MRI.addLiveIn(FlatScratchInitReg);

    MBB.addLiveIn(FlatScratchInitReg);


    FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);

    FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);

  }


  // Do a 64-bit pointer add.

  if (ST.flatScratchIsPointer()) {

    if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {

      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)

        .addReg(FlatScrInitLo)

        .addReg(ScratchWaveOffsetReg);

      auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),

                          FlatScrInitHi)

        .addReg(FlatScrInitHi)

        .addImm(0);

      Addc->getOperand(3).setIsDead(); // Mark SCC as dead.


      using namespace AMDGPU::Hwreg;

      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32))

          .addReg(FlatScrInitLo)

          .addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_LO, 0, 32)));

      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32))

          .addReg(FlatScrInitHi)

          .addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_HI, 0, 32)));

      return;

    }


    // For GFX9.

    BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)

      .addReg(FlatScrInitLo)

      .addReg(ScratchWaveOffsetReg);

    auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),

                        AMDGPU::FLAT_SCR_HI)

      .addReg(FlatScrInitHi)

      .addImm(0);

    Addc->getOperand(3).setIsDead(); // Mark SCC as dead.


    return;

  }


  assert(ST.getGeneration() < AMDGPUSubtarget::GFX9);


  // Copy the size in bytes.

  BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)

    .addReg(FlatScrInitHi, RegState::Kill);


  // Add wave offset in bytes to private base offset.

  // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.

  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), FlatScrInitLo)

      .addReg(FlatScrInitLo)

      .addReg(ScratchWaveOffsetReg);


  // Convert offset to 256-byte units.

  auto LShr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32),

                      AMDGPU::FLAT_SCR_HI)

    .addReg(FlatScrInitLo, RegState::Kill)

    .addImm(8);

  LShr->getOperand(3).setIsDead(); // Mark SCC as dead.

}


// Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not

// memory. They should have been removed by now.


static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {

  for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();

       I != E; ++I) {

    if (!MFI.isDeadObjectIndex(I))

      return false;

  }


  return true;

}


// Shift down registers reserved for the scratch RSRC.

Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(

    MachineFunction &MF) const {


  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIInstrInfo *TII = ST.getInstrInfo();

  const SIRegisterInfo *TRI = &TII->getRegisterInfo();

  MachineRegisterInfo &MRI = MF.getRegInfo();

  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();


  assert(MFI->isEntryFunction());


  Register ScratchRsrcReg = MFI->getScratchRSrcReg();


  if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) &&

                          allStackObjectsAreDead(MF.getFrameInfo())))

    return Register();


  if (ST.hasSGPRInitBug() ||

      ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))

    return ScratchRsrcReg;


  // We reserved the last registers for this. Shift it down to the end of those

  // which were actually used.

  //

  // FIXME: It might be safer to use a pseudoregister before replacement.


  // FIXME: We should be able to eliminate unused input registers. We only

  // cannot do this for the resources required for scratch access. For now we

  // skip over user SGPRs and may leave unused holes.


  unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;

  ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF);

  AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));


  // Skip the last N reserved elements because they should have already been

  // reserved for VCC etc.

  Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);

  for (MCPhysReg Reg : AllSGPR128s) {

    // Pick the first unallocated one. Make sure we don't clobber the other

    // reserved input we needed. Also for PAL, make sure we don't clobber

    // the GIT pointer passed in SGPR0 or SGPR8.

    if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&

        (!GITPtrLoReg || !TRI->isSubRegisterEq(Reg, GITPtrLoReg))) {

      MRI.replaceRegWith(ScratchRsrcReg, Reg);

      MFI->setScratchRSrcReg(Reg);

      MRI.reserveReg(Reg, TRI);

      return Reg;

    }

  }


  return ScratchRsrcReg;

}


static unsigned getScratchScaleFactor(const GCNSubtarget &ST) {

  return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize();

}


void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,

                                                MachineBasicBlock &MBB) const {

  assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");


  // FIXME: If we only have SGPR spills, we won't actually be using scratch

  // memory since these spill to VGPRs. We should be cleaning up these unused

  // SGPR spill frame indices somewhere.


  // FIXME: We still have implicit uses on SGPR spill instructions in case they

  // need to spill to vector memory. It's likely that will not happen, but at

  // this point it appears we need the setup. This part of the prolog should be

  // emitted after frame indices are eliminated.


  // FIXME: Remove all of the isPhysRegUsed checks


  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIInstrInfo *TII = ST.getInstrInfo();

  const SIRegisterInfo *TRI = &TII->getRegisterInfo();

  MachineRegisterInfo &MRI = MF.getRegInfo();

  const Function &F = MF.getFunction();

  MachineFrameInfo &FrameInfo = MF.getFrameInfo();


  assert(MFI->isEntryFunction());


  Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(

      AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);


  // We need to do the replacement of the private segment buffer register even

  // if there are no stack objects. There could be stores to undef or a

  // constant without an associated object.

  //

  // This will return `Register()` in cases where there are no actual

  // uses of the SRSRC.

  Register ScratchRsrcReg;

  if (!ST.enableFlatScratch())

    ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);


  // Make the selected register live throughout the function.

  if (ScratchRsrcReg) {

    for (MachineBasicBlock &OtherBB : MF) {

      if (&OtherBB != &MBB) {

        OtherBB.addLiveIn(ScratchRsrcReg);

      }

    }

  }


  // Now that we have fixed the reserved SRSRC we need to locate the

  // (potentially) preloaded SRSRC.

  Register PreloadedScratchRsrcReg;

  if (ST.isAmdHsaOrMesa(F)) {

    PreloadedScratchRsrcReg =

        MFI->getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);

    if (ScratchRsrcReg && PreloadedScratchRsrcReg) {

      // We added live-ins during argument lowering, but since they were not

      // used they were deleted. We're adding the uses now, so add them back.

      MRI.addLiveIn(PreloadedScratchRsrcReg);

      MBB.addLiveIn(PreloadedScratchRsrcReg);

    }

  }


  // Debug location must be unknown since the first debug location is used to

  // determine the end of the prologue.

  DebugLoc DL;

  MachineBasicBlock::iterator I = MBB.begin();


  // We found the SRSRC first because it needs four registers and has an

  // alignment requirement. If the SRSRC that we found is clobbering with

  // the scratch wave offset, which may be in a fixed SGPR or a free SGPR

  // chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch

  // wave offset to a free SGPR.

  Register ScratchWaveOffsetReg;

  if (PreloadedScratchWaveOffsetReg &&

      TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {

    ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF);

    unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();

    AllSGPRs = AllSGPRs.slice(

        std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded));

    Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);

    for (MCPhysReg Reg : AllSGPRs) {

      if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&

          !TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {

        ScratchWaveOffsetReg = Reg;

        BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)

            .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);

        break;

      }

    }


    // FIXME: We can spill incoming arguments and restore at the end of the

    // prolog.

    if (!ScratchWaveOffsetReg)

      report_fatal_error(

          "could not find temporary scratch offset register in prolog");

  } else {

    ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;

  }

  assert(ScratchWaveOffsetReg || !PreloadedScratchWaveOffsetReg);


  unsigned Offset = FrameInfo.getStackSize() * getScratchScaleFactor(ST);

  if (!mayReserveScratchForCWSR(MF)) {

    if (hasFP(MF)) {

      Register FPReg = MFI->getFrameOffsetReg();

      assert(FPReg != AMDGPU::FP_REG);

      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);

    }


    if (requiresStackPointerReference(MF)) {

      Register SPReg = MFI->getStackPtrOffsetReg();

      assert(SPReg != AMDGPU::SP_REG);

      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg).addImm(Offset);

    }

  } else {

    // We need to check if we're on a compute queue - if we are, then the CWSR

    // trap handler may need to store some VGPRs on the stack. The first VGPR

    // block is saved separately, so we only need to allocate space for any

    // additional VGPR blocks used. For now, we will make sure there's enough

    // room for the theoretical maximum number of VGPRs that can be allocated.

    // FIXME: Figure out if the shader uses fewer VGPRs in practice.

    assert(hasFP(MF));

    Register FPReg = MFI->getFrameOffsetReg();

    assert(FPReg != AMDGPU::FP_REG);

    unsigned VGPRSize = llvm::alignTo(

        (ST.getAddressableNumVGPRs(MFI->getDynamicVGPRBlockSize()) -

         AMDGPU::IsaInfo::getVGPRAllocGranule(&ST,

                                              MFI->getDynamicVGPRBlockSize())) *

            4,

        FrameInfo.getMaxAlign());

    MFI->setScratchReservedForDynamicVGPRs(VGPRSize);


    BuildMI(MBB, I, DL, TII->get(AMDGPU::S_GETREG_B32), FPReg)

        .addImm(AMDGPU::Hwreg::HwregEncoding::encode(

            AMDGPU::Hwreg::ID_HW_ID2, AMDGPU::Hwreg::OFFSET_ME_ID, 2));

    // The MicroEngine ID is 0 for the graphics queue, and 1 or 2 for compute

    // (3 is unused, so we ignore it). Unfortunately, S_GETREG doesn't set

    // SCC, so we need to check for 0 manually.

    BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CMP_LG_U32)).addImm(0).addReg(FPReg);

    BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CMOVK_I32), FPReg).addImm(VGPRSize);

    if (requiresStackPointerReference(MF)) {

      Register SPReg = MFI->getStackPtrOffsetReg();

      assert(SPReg != AMDGPU::SP_REG);


      // If at least one of the constants can be inlined, then we can use

      // s_cselect. Otherwise, use a mov and cmovk.

      if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm()) ||

          AMDGPU::isInlinableLiteral32(Offset + VGPRSize,

                                       ST.hasInv2PiInlineImm())) {

        BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CSELECT_B32), SPReg)

            .addImm(Offset + VGPRSize)

            .addImm(Offset);

      } else {

        BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg).addImm(Offset);

        BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CMOVK_I32), SPReg)

            .addImm(Offset + VGPRSize);

      }

    }

  }


  bool NeedsFlatScratchInit =

      MFI->getUserSGPRInfo().hasFlatScratchInit() &&

      (MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||

       (!allStackObjectsAreDead(FrameInfo) && ST.enableFlatScratch()));


  if ((NeedsFlatScratchInit || ScratchRsrcReg) &&

      PreloadedScratchWaveOffsetReg && !ST.flatScratchIsArchitected()) {

    MRI.addLiveIn(PreloadedScratchWaveOffsetReg);

    MBB.addLiveIn(PreloadedScratchWaveOffsetReg);

  }


  if (NeedsFlatScratchInit) {

    emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);

  }


  if (ScratchRsrcReg) {

    emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL,

                                         PreloadedScratchRsrcReg,

                                         ScratchRsrcReg, ScratchWaveOffsetReg);

  }

}


// Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg`

void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(

    MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,

    const DebugLoc &DL, Register PreloadedScratchRsrcReg,

    Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const {


  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIInstrInfo *TII = ST.getInstrInfo();

  const SIRegisterInfo *TRI = &TII->getRegisterInfo();

  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

  const Function &Fn = MF.getFunction();


  if (ST.isAmdPalOS()) {

    // The pointer to the GIT is formed from the offset passed in and either

    // the amdgpu-git-ptr-high function attribute or the top part of the PC

    Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);

    Register Rsrc03 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);


    buildGitPtr(MBB, I, DL, TII, Rsrc01);


    // We now have the GIT ptr - now get the scratch descriptor from the entry

    // at offset 0 (or offset 16 for a compute shader).

    MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);

    const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);

    auto *MMO = MF.getMachineMemOperand(

        PtrInfo,

        MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |

            MachineMemOperand::MODereferenceable,

        16, Align(4));

    unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;

    const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();

    unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);

    BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)

      .addReg(Rsrc01)

      .addImm(EncodedOffset) // offset

      .addImm(0) // cpol

      .addReg(ScratchRsrcReg, RegState::ImplicitDefine)

      .addMemOperand(MMO);


    // The driver will always set the SRD for wave 64 (bits 118:117 of

    // descriptor / bits 22:21 of third sub-reg will be 0b11)

    // If the shader is actually wave32 we have to modify the const_index_stride

    // field of the descriptor 3rd sub-reg (bits 22:21) to 0b10 (stride=32). The

    // reason the driver does this is that there can be cases where it presents

    // 2 shaders with different wave size (e.g. VsFs).

    // TODO: convert to using SCRATCH instructions or multiple SRD buffers

    if (ST.isWave32()) {

      const MCInstrDesc &SBitsetB32 = TII->get(AMDGPU::S_BITSET0_B32);

      BuildMI(MBB, I, DL, SBitsetB32, Rsrc03)

          .addImm(21)

          .addReg(Rsrc03);

    }

  } else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {

    assert(!ST.isAmdHsaOrMesa(Fn));

    const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);


    Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);

    Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);


    // Use relocations to get the pointer, and setup the other bits manually.

    uint64_t Rsrc23 = TII->getScratchRsrcWords23();


    if (MFI->getUserSGPRInfo().hasImplicitBufferPtr()) {

      Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);


      if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {

        const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);


        BuildMI(MBB, I, DL, Mov64, Rsrc01)

          .addReg(MFI->getImplicitBufferPtrUserSGPR())

          .addReg(ScratchRsrcReg, RegState::ImplicitDefine);

      } else {

        const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);


        MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);

        auto *MMO = MF.getMachineMemOperand(

            PtrInfo,

            MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |

                MachineMemOperand::MODereferenceable,

            8, Align(4));

        BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)

          .addReg(MFI->getImplicitBufferPtrUserSGPR())

          .addImm(0) // offset

          .addImm(0) // cpol

          .addMemOperand(MMO)

          .addReg(ScratchRsrcReg, RegState::ImplicitDefine);


        MF.getRegInfo().addLiveIn(MFI->getImplicitBufferPtrUserSGPR());

        MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR());

      }

    } else {

      Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);

      Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);


      BuildMI(MBB, I, DL, SMovB32, Rsrc0)

        .addExternalSymbol("SCRATCH_RSRC_DWORD0")

        .addReg(ScratchRsrcReg, RegState::ImplicitDefine);


      BuildMI(MBB, I, DL, SMovB32, Rsrc1)

        .addExternalSymbol("SCRATCH_RSRC_DWORD1")

        .addReg(ScratchRsrcReg, RegState::ImplicitDefine);

    }


    BuildMI(MBB, I, DL, SMovB32, Rsrc2)

        .addImm(Lo_32(Rsrc23))

        .addReg(ScratchRsrcReg, RegState::ImplicitDefine);


    BuildMI(MBB, I, DL, SMovB32, Rsrc3)

        .addImm(Hi_32(Rsrc23))

        .addReg(ScratchRsrcReg, RegState::ImplicitDefine);

  } else if (ST.isAmdHsaOrMesa(Fn)) {

    assert(PreloadedScratchRsrcReg);


    if (ScratchRsrcReg != PreloadedScratchRsrcReg) {

      BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)

          .addReg(PreloadedScratchRsrcReg, RegState::Kill);

    }

  }


  // Add the scratch wave offset into the scratch RSRC.

  //

  // We only want to update the first 48 bits, which is the base address

  // pointer, without touching the adjacent 16 bits of flags. We know this add

  // cannot carry-out from bit 47, otherwise the scratch allocation would be

  // impossible to fit in the 48-bit global address space.

  //

  // TODO: Evaluate if it is better to just construct an SRD using the flat

  // scratch init and some constants rather than update the one we are passed.

  Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);

  Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);


  // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in

  // the kernel body via inreg arguments.

  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0)

      .addReg(ScratchRsrcSub0)

      .addReg(ScratchWaveOffsetReg)

      .addReg(ScratchRsrcReg, RegState::ImplicitDefine);

  auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)

      .addReg(ScratchRsrcSub1)

      .addImm(0)

      .addReg(ScratchRsrcReg, RegState::ImplicitDefine);

  Addc->getOperand(3).setIsDead(); // Mark SCC as dead.

}


bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {

  switch (ID) {

  case TargetStackID::Default:

  case TargetStackID::NoAlloc:

  case TargetStackID::SGPRSpill:

    return true;

  case TargetStackID::ScalableVector:

  case TargetStackID::WasmLocal:

    return false;

  }

  llvm_unreachable("Invalid TargetStackID::Value");

}


// Activate only the inactive lanes when \p EnableInactiveLanes is true.

// Otherwise, activate all lanes. It returns the saved exec.


static Register buildScratchExecCopy(LiveRegUnits &LiveUnits,

                                     MachineFunction &MF,

                                     MachineBasicBlock &MBB,

                                     MachineBasicBlock::iterator MBBI,

                                     const DebugLoc &DL, bool IsProlog,

                                     bool EnableInactiveLanes) {

  Register ScratchExecCopy;

  MachineRegisterInfo &MRI = MF.getRegInfo();

  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIInstrInfo *TII = ST.getInstrInfo();

  const SIRegisterInfo &TRI = TII->getRegisterInfo();

  SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();


  initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, IsProlog);


  if (FuncInfo->isWholeWaveFunction()) {

    // Whole wave functions already have a copy of the original EXEC mask that

    // we can use.

    assert(IsProlog && "Epilog should look at return, not setup");

    ScratchExecCopy =

        TII->getWholeWaveFunctionSetup(MF)->getOperand(0).getReg();

    assert(ScratchExecCopy && "Couldn't find copy of EXEC");

  } else {

    ScratchExecCopy = findScratchNonCalleeSaveRegister(

        MRI, LiveUnits, *TRI.getWaveMaskRegClass());

  }


  if (!ScratchExecCopy)

    report_fatal_error("failed to find free scratch register");


  LiveUnits.addReg(ScratchExecCopy);


  const unsigned SaveExecOpc =

      ST.isWave32() ? (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B32

                                           : AMDGPU::S_OR_SAVEEXEC_B32)

                    : (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B64

                                           : AMDGPU::S_OR_SAVEEXEC_B64);

  auto SaveExec =

      BuildMI(MBB, MBBI, DL, TII->get(SaveExecOpc), ScratchExecCopy).addImm(-1);

  SaveExec->getOperand(3).setIsDead(); // Mark SCC as dead.


  return ScratchExecCopy;

}


void SIFrameLowering::emitCSRSpillStores(

    MachineFunction &MF, MachineBasicBlock &MBB,

    MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits,

    Register FrameReg, Register FramePtrRegScratchCopy) const {

  SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();

  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIInstrInfo *TII = ST.getInstrInfo();

  const SIRegisterInfo &TRI = TII->getRegisterInfo();

  MachineRegisterInfo &MRI = MF.getRegInfo();


  // Spill Whole-Wave Mode VGPRs. Save only the inactive lanes of the scratch

  // registers. However, save all lanes of callee-saved VGPRs. Due to this, we

  // might end up flipping the EXEC bits twice.

  Register ScratchExecCopy;

  SmallVector<std::pair<Register, int>, 2> WWMCalleeSavedRegs, WWMScratchRegs;

  FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);

  if (!WWMScratchRegs.empty())

    ScratchExecCopy =

        buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,

                             /*IsProlog*/ true, /*EnableInactiveLanes*/ true);


  auto StoreWWMRegisters =

      [&](SmallVectorImpl<std::pair<Register, int>> &WWMRegs) {

        for (const auto &Reg : WWMRegs) {

          Register VGPR = Reg.first;

          int FI = Reg.second;

          buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,

                           VGPR, FI, FrameReg);

        }

      };


  for (const Register Reg : make_first_range(WWMScratchRegs)) {

    if (!MRI.isReserved(Reg)) {

      MRI.addLiveIn(Reg);

      MBB.addLiveIn(Reg);

    }

  }

  StoreWWMRegisters(WWMScratchRegs);


  auto EnableAllLanes = [&]() {

    unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;

    BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addImm(-1);

  };


  if (!WWMCalleeSavedRegs.empty()) {

    if (ScratchExecCopy) {

      EnableAllLanes();

    } else {

      ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,

                                             /*IsProlog*/ true,

                                             /*EnableInactiveLanes*/ false);

    }

  }


  StoreWWMRegisters(WWMCalleeSavedRegs);

  if (FuncInfo->isWholeWaveFunction()) {

    // SI_WHOLE_WAVE_FUNC_SETUP has outlived its purpose, so we can remove

    // it now. If we have already saved some WWM CSR registers, then the EXEC is

    // already -1 and we don't need to do anything else. Otherwise, set EXEC to

    // -1 here.

    if (!ScratchExecCopy)

      buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL, /*IsProlog*/ true,

                           /*EnableInactiveLanes*/ true);

    else if (WWMCalleeSavedRegs.empty())

      EnableAllLanes();

    TII->getWholeWaveFunctionSetup(MF)->eraseFromParent();

  } else if (ScratchExecCopy) {

    // FIXME: Split block and make terminator.

    unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;

    BuildMI(MBB, MBBI, DL, TII->get(ExecMov), TRI.getExec())

        .addReg(ScratchExecCopy, RegState::Kill);

    LiveUnits.addReg(ScratchExecCopy);

  }


  Register FramePtrReg = FuncInfo->getFrameOffsetReg();


  for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills()) {

    // Special handle FP spill:

    // Skip if FP is saved to a scratch SGPR, the save has already been emitted.

    // Otherwise, FP has been moved to a temporary register and spill it

    // instead.

    Register Reg =

        Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;

    if (!Reg)

      continue;


    PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,

                                    LiveUnits, FrameReg);

    SB.save();

  }


  // If a copy to scratch SGPR has been chosen for any of the SGPR spills, make

  // such scratch registers live throughout the function.

  SmallVector<Register, 1> ScratchSGPRs;

  FuncInfo->getAllScratchSGPRCopyDstRegs(ScratchSGPRs);

  if (!ScratchSGPRs.empty()) {

    for (MachineBasicBlock &MBB : MF) {

      for (MCPhysReg Reg : ScratchSGPRs)

        MBB.addLiveIn(Reg);


      MBB.sortUniqueLiveIns();

    }

    if (!LiveUnits.empty()) {

      for (MCPhysReg Reg : ScratchSGPRs)

        LiveUnits.addReg(Reg);

    }

  }

}


void SIFrameLowering::emitCSRSpillRestores(

    MachineFunction &MF, MachineBasicBlock &MBB,

    MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits,

    Register FrameReg, Register FramePtrRegScratchCopy) const {

  const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();

  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIInstrInfo *TII = ST.getInstrInfo();

  const SIRegisterInfo &TRI = TII->getRegisterInfo();

  Register FramePtrReg = FuncInfo->getFrameOffsetReg();


  for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills()) {

    // Special handle FP restore:

    // Skip if FP needs to be restored from the scratch SGPR. Otherwise, restore

    // the FP value to a temporary register. The frame pointer should be

    // overwritten only at the end when all other spills are restored from

    // current frame.

    Register Reg =

        Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;

    if (!Reg)

      continue;


    PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,

                                    LiveUnits, FrameReg);

    SB.restore();

  }


  // Restore Whole-Wave Mode VGPRs. Restore only the inactive lanes of the

  // scratch registers. However, restore all lanes of callee-saved VGPRs. Due to

  // this, we might end up flipping the EXEC bits twice.

  Register ScratchExecCopy;

  SmallVector<std::pair<Register, int>, 2> WWMCalleeSavedRegs, WWMScratchRegs;

  FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);

  auto RestoreWWMRegisters =

      [&](SmallVectorImpl<std::pair<Register, int>> &WWMRegs) {

        for (const auto &Reg : WWMRegs) {

          Register VGPR = Reg.first;

          int FI = Reg.second;

          buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,

                             VGPR, FI, FrameReg);

        }

      };


  if (FuncInfo->isWholeWaveFunction()) {

    // For whole wave functions, the EXEC is already -1 at this point.

    // Therefore, we can restore the CSR WWM registers right away.

    RestoreWWMRegisters(WWMCalleeSavedRegs);


    // The original EXEC is the first operand of the return instruction.

    MachineInstr &Return = MBB.instr_back();

    unsigned Opcode = Return.getOpcode();

    switch (Opcode) {

    case AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN:

      Opcode = AMDGPU::SI_RETURN;

      break;

    case AMDGPU::SI_TCRETURN_GFX_WholeWave:

      Opcode = AMDGPU::SI_TCRETURN_GFX;

      break;

    default:

      llvm_unreachable("Unexpected return inst");

    }

    Register OrigExec = Return.getOperand(0).getReg();


    if (!WWMScratchRegs.empty()) {

      unsigned XorOpc = ST.isWave32() ? AMDGPU::S_XOR_B32 : AMDGPU::S_XOR_B64;

      BuildMI(MBB, MBBI, DL, TII->get(XorOpc), TRI.getExec())

          .addReg(OrigExec)

          .addImm(-1);

      RestoreWWMRegisters(WWMScratchRegs);

    }


    // Restore original EXEC.

    unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;

    BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addReg(OrigExec);


    // Drop the first operand and update the opcode.

    Return.removeOperand(0);

    Return.setDesc(TII->get(Opcode));


    return;

  }


  if (!WWMScratchRegs.empty()) {

    ScratchExecCopy =

        buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,

                             /*IsProlog=*/false, /*EnableInactiveLanes=*/true);

  }

  RestoreWWMRegisters(WWMScratchRegs);

  if (!WWMCalleeSavedRegs.empty()) {

    if (ScratchExecCopy) {

      unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;

      BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addImm(-1);

    } else {

      ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,

                                             /*IsProlog*/ false,

                                             /*EnableInactiveLanes*/ false);

    }

  }


  RestoreWWMRegisters(WWMCalleeSavedRegs);

  if (ScratchExecCopy) {

    // FIXME: Split block and make terminator.

    unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;

    BuildMI(MBB, MBBI, DL, TII->get(ExecMov), TRI.getExec())

        .addReg(ScratchExecCopy, RegState::Kill);

  }

}


void SIFrameLowering::emitPrologue(MachineFunction &MF,

                                   MachineBasicBlock &MBB) const {

  SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();

  if (FuncInfo->isEntryFunction()) {

    emitEntryFunctionPrologue(MF, MBB);

    return;

  }


  MachineFrameInfo &MFI = MF.getFrameInfo();

  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIInstrInfo *TII = ST.getInstrInfo();

  const SIRegisterInfo &TRI = TII->getRegisterInfo();

  MachineRegisterInfo &MRI = MF.getRegInfo();


  Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();

  Register FramePtrReg = FuncInfo->getFrameOffsetReg();

  Register BasePtrReg =

      TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();

  LiveRegUnits LiveUnits;


  MachineBasicBlock::iterator MBBI = MBB.begin();

  // DebugLoc must be unknown since the first instruction with DebugLoc is used

  // to determine the end of the prologue.

  DebugLoc DL;


  if (FuncInfo->isChainFunction()) {

    // Functions with the amdgpu_cs_chain[_preserve] CC don't receive a SP, but

    // are free to set one up if they need it.

    bool UseSP = requiresStackPointerReference(MF);

    if (UseSP) {

      assert(StackPtrReg != AMDGPU::SP_REG);


      BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_MOV_B32), StackPtrReg)

          .addImm(MFI.getStackSize() * getScratchScaleFactor(ST));

    }

  }


  bool HasFP = false;

  bool HasBP = false;

  uint32_t NumBytes = MFI.getStackSize();

  uint32_t RoundedSize = NumBytes;


  if (TRI.hasStackRealignment(MF))

    HasFP = true;


  Register FramePtrRegScratchCopy;

  if (!HasFP && !hasFP(MF)) {

    // Emit the CSR spill stores with SP base register.

    emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits,

                       FuncInfo->isChainFunction() ? Register() : StackPtrReg,

                       FramePtrRegScratchCopy);

  } else {

    // CSR spill stores will use FP as base register.

    Register SGPRForFPSaveRestoreCopy =

        FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);


    initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);

    if (SGPRForFPSaveRestoreCopy) {

      // Copy FP to the scratch register now and emit the CFI entry. It avoids

      // the extra FP copy needed in the other two cases when FP is spilled to

      // memory or to a VGPR lane.

      PrologEpilogSGPRSpillBuilder SB(

          FramePtrReg,

          FuncInfo->getPrologEpilogSGPRSaveRestoreInfo(FramePtrReg), MBB, MBBI,

          DL, TII, TRI, LiveUnits, FramePtrReg);

      SB.save();

      LiveUnits.addReg(SGPRForFPSaveRestoreCopy);

    } else {

      // Copy FP into a new scratch register so that its previous value can be

      // spilled after setting up the new frame.

      FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(

          MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);

      if (!FramePtrRegScratchCopy)

        report_fatal_error("failed to find free scratch register");


      LiveUnits.addReg(FramePtrRegScratchCopy);

      BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrRegScratchCopy)

          .addReg(FramePtrReg);

    }

  }


  if (HasFP) {

    const unsigned Alignment = MFI.getMaxAlign().value();


    RoundedSize += Alignment;

    if (LiveUnits.empty()) {

      LiveUnits.init(TRI);

      LiveUnits.addLiveIns(MBB);

    }


    // s_add_i32 s33, s32, NumBytes

    // s_and_b32 s33, s33, 0b111...0000

    BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), FramePtrReg)

        .addReg(StackPtrReg)

        .addImm((Alignment - 1) * getScratchScaleFactor(ST))

        .setMIFlag(MachineInstr::FrameSetup);

    auto And = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)

        .addReg(FramePtrReg, RegState::Kill)

        .addImm(-Alignment * getScratchScaleFactor(ST))

        .setMIFlag(MachineInstr::FrameSetup);

    And->getOperand(3).setIsDead(); // Mark SCC as dead.

    FuncInfo->setIsStackRealigned(true);

  } else if ((HasFP = hasFP(MF))) {

    BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)

        .addReg(StackPtrReg)

        .setMIFlag(MachineInstr::FrameSetup);

  }


  // If FP is used, emit the CSR spills with FP base register.

  if (HasFP) {

    emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,

                       FramePtrRegScratchCopy);

    if (FramePtrRegScratchCopy)

      LiveUnits.removeReg(FramePtrRegScratchCopy);

  }


  // If we need a base pointer, set it up here. It's whatever the value of

  // the stack pointer is at this point. Any variable size objects will be

  // allocated after this, so we can still use the base pointer to reference

  // the incoming arguments.

  if ((HasBP = TRI.hasBasePointer(MF))) {

    BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)

        .addReg(StackPtrReg)

        .setMIFlag(MachineInstr::FrameSetup);

  }


  if (HasFP && RoundedSize != 0) {

    auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)

        .addReg(StackPtrReg)

        .addImm(RoundedSize * getScratchScaleFactor(ST))

        .setMIFlag(MachineInstr::FrameSetup);

    Add->getOperand(3).setIsDead(); // Mark SCC as dead.

  }


  bool FPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg);

  (void)FPSaved;

  assert((!HasFP || FPSaved) &&

         "Needed to save FP but didn't save it anywhere");


  // If we allow spilling to AGPRs we may have saved FP but then spill

  // everything into AGPRs instead of the stack.

  assert((HasFP || !FPSaved || EnableSpillVGPRToAGPR) &&

         "Saved FP but didn't need it");


  bool BPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(BasePtrReg);

  (void)BPSaved;

  assert((!HasBP || BPSaved) &&

         "Needed to save BP but didn't save it anywhere");


  assert((HasBP || !BPSaved) && "Saved BP but didn't need it");

}


void SIFrameLowering::emitEpilogue(MachineFunction &MF,

                                   MachineBasicBlock &MBB) const {

  const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();

  if (FuncInfo->isEntryFunction())

    return;


  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIInstrInfo *TII = ST.getInstrInfo();

  const SIRegisterInfo &TRI = TII->getRegisterInfo();

  MachineRegisterInfo &MRI = MF.getRegInfo();

  LiveRegUnits LiveUnits;

  // Get the insert location for the epilogue. If there were no terminators in

  // the block, get the last instruction.

  MachineBasicBlock::iterator MBBI = MBB.end();

  DebugLoc DL;

  if (!MBB.empty()) {

    MBBI = MBB.getLastNonDebugInstr();

    if (MBBI != MBB.end())

      DL = MBBI->getDebugLoc();


    MBBI = MBB.getFirstTerminator();

  }


  const MachineFrameInfo &MFI = MF.getFrameInfo();

  uint32_t NumBytes = MFI.getStackSize();

  uint32_t RoundedSize = FuncInfo->isStackRealigned()

                             ? NumBytes + MFI.getMaxAlign().value()

                             : NumBytes;

  const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();

  Register FramePtrReg = FuncInfo->getFrameOffsetReg();

  bool FPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg);


  if (RoundedSize != 0) {

    if (TRI.hasBasePointer(MF)) {

      BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), StackPtrReg)

          .addReg(TRI.getBaseRegister())

          .setMIFlag(MachineInstr::FrameDestroy);

    } else if (hasFP(MF)) {

      BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), StackPtrReg)

          .addReg(FramePtrReg)

          .setMIFlag(MachineInstr::FrameDestroy);

    }

  }


  Register FramePtrRegScratchCopy;

  Register SGPRForFPSaveRestoreCopy =

      FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);

  if (FPSaved) {

    // CSR spill restores should use FP as base register. If

    // SGPRForFPSaveRestoreCopy is not true, restore the previous value of FP

    // into a new scratch register and copy to FP later when other registers are

    // restored from the current stack frame.

    initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);

    if (SGPRForFPSaveRestoreCopy) {

      LiveUnits.addReg(SGPRForFPSaveRestoreCopy);

    } else {

      FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(

          MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);

      if (!FramePtrRegScratchCopy)

        report_fatal_error("failed to find free scratch register");


      LiveUnits.addReg(FramePtrRegScratchCopy);

    }


    emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,

                         FramePtrRegScratchCopy);

  }


  if (FPSaved) {

    // Insert the copy to restore FP.

    Register SrcReg = SGPRForFPSaveRestoreCopy ? SGPRForFPSaveRestoreCopy

                                               : FramePtrRegScratchCopy;

    MachineInstrBuilder MIB =

        BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)

            .addReg(SrcReg);

    if (SGPRForFPSaveRestoreCopy)

      MIB.setMIFlag(MachineInstr::FrameDestroy);

  } else {

    // Insert the CSR spill restores with SP as the base register.

    emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits,

                         FuncInfo->isChainFunction() ? Register() : StackPtrReg,

                         FramePtrRegScratchCopy);

  }

}


#ifndef NDEBUG


static bool allSGPRSpillsAreDead(const MachineFunction &MF) {

  const MachineFrameInfo &MFI = MF.getFrameInfo();

  const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();

  for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();

       I != E; ++I) {

    if (!MFI.isDeadObjectIndex(I) &&

        MFI.getStackID(I) == TargetStackID::SGPRSpill &&

        !FuncInfo->checkIndexInPrologEpilogSGPRSpills(I)) {

      return false;

    }

  }


  return true;

}


#endif


StackOffset SIFrameLowering::getFrameIndexReference(const MachineFunction &MF,

                                                    int FI,

                                                    Register &FrameReg) const {

  const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();


  FrameReg = RI->getFrameRegister(MF);

  return StackOffset::getFixed(MF.getFrameInfo().getObjectOffset(FI));

}


void SIFrameLowering::processFunctionBeforeFrameFinalized(

  MachineFunction &MF,

  RegScavenger *RS) const {

  MachineFrameInfo &MFI = MF.getFrameInfo();


  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIInstrInfo *TII = ST.getInstrInfo();

  const SIRegisterInfo *TRI = ST.getRegisterInfo();

  MachineRegisterInfo &MRI = MF.getRegInfo();

  SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();


  const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()

                               && EnableSpillVGPRToAGPR;


  if (SpillVGPRToAGPR) {

    // To track the spill frame indices handled in this pass.

    BitVector SpillFIs(MFI.getObjectIndexEnd(), false);

    BitVector NonVGPRSpillFIs(MFI.getObjectIndexEnd(), false);


    bool SeenDbgInstr = false;


    for (MachineBasicBlock &MBB : MF) {

      for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {

        int FrameIndex;

        if (MI.isDebugInstr())

          SeenDbgInstr = true;


        if (TII->isVGPRSpill(MI)) {

          // Try to eliminate stack used by VGPR spills before frame

          // finalization.

          unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),

                                                     AMDGPU::OpName::vaddr);

          int FI = MI.getOperand(FIOp).getIndex();

          Register VReg =

            TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();

          if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,

                                                TRI->isAGPR(MRI, VReg))) {

            assert(RS != nullptr);

            RS->enterBasicBlockEnd(MBB);

            RS->backward(std::next(MI.getIterator()));

            TRI->eliminateFrameIndex(MI, 0, FIOp, RS);

            SpillFIs.set(FI);

            continue;

          }

        } else if (TII->isStoreToStackSlot(MI, FrameIndex) ||

                   TII->isLoadFromStackSlot(MI, FrameIndex))

          if (!MFI.isFixedObjectIndex(FrameIndex))

            NonVGPRSpillFIs.set(FrameIndex);

      }

    }


    // Stack slot coloring may assign different objects to the same stack slot.

    // If not, then the VGPR to AGPR spill slot is dead.

    for (unsigned FI : SpillFIs.set_bits())

      if (!NonVGPRSpillFIs.test(FI))

        FuncInfo->setVGPRToAGPRSpillDead(FI);


    for (MachineBasicBlock &MBB : MF) {

      for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())

        MBB.addLiveIn(Reg);


      for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs())

        MBB.addLiveIn(Reg);


      MBB.sortUniqueLiveIns();


      if (!SpillFIs.empty() && SeenDbgInstr) {

        // FIXME: The dead frame indices are replaced with a null register from

        // the debug value instructions. We should instead, update it with the

        // correct register value. But not sure the register value alone is

        for (MachineInstr &MI : MBB) {

          if (MI.isDebugValue()) {

            uint32_t StackOperandIdx = MI.isDebugValueList() ? 2 : 0;

            if (MI.getOperand(StackOperandIdx).isFI() &&

                !MFI.isFixedObjectIndex(

                    MI.getOperand(StackOperandIdx).getIndex()) &&

                SpillFIs[MI.getOperand(StackOperandIdx).getIndex()]) {

              MI.getOperand(StackOperandIdx)

                  .ChangeToRegister(Register(), false /*isDef*/);

            }

          }

        }

      }

    }

  }


  // At this point we've already allocated all spilled SGPRs to VGPRs if we

  // can. Any remaining SGPR spills will go to memory, so move them back to the

  // default stack.

  bool HaveSGPRToVMemSpill =

      FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ true);

  assert(allSGPRSpillsAreDead(MF) &&

         "SGPR spill should have been removed in SILowerSGPRSpills");


  // FIXME: The other checks should be redundant with allStackObjectsAreDead,

  // but currently hasNonSpillStackObjects is set only from source

  // allocas. Stack temps produced from legalization are not counted currently.

  if (!allStackObjectsAreDead(MFI)) {

    assert(RS && "RegScavenger required if spilling");


    // Add an emergency spill slot

    RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI));


    // If we are spilling SGPRs to memory with a large frame, we may need a

    // second VGPR emergency frame index.

    if (HaveSGPRToVMemSpill &&

        allocateScavengingFrameIndexesNearIncomingSP(MF)) {

      RS->addScavengingFrameIndex(MFI.CreateSpillStackObject(4, Align(4)));

    }

  }

}


void SIFrameLowering::processFunctionBeforeFrameIndicesReplaced(

    MachineFunction &MF, RegScavenger *RS) const {

  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIRegisterInfo *TRI = ST.getRegisterInfo();

  MachineRegisterInfo &MRI = MF.getRegInfo();

  SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();


  if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {

    // On gfx908, we had initially reserved highest available VGPR for AGPR

    // copy. Now since we are done with RA, check if there exist an unused VGPR

    // which is lower than the eariler reserved VGPR before RA. If one exist,

    // use it for AGPR copy instead of one reserved before RA.

    Register VGPRForAGPRCopy = FuncInfo->getVGPRForAGPRCopy();

    Register UnusedLowVGPR =

        TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);

    if (UnusedLowVGPR && (TRI->getHWRegIndex(UnusedLowVGPR) <

                          TRI->getHWRegIndex(VGPRForAGPRCopy))) {

      // Reserve this newly identified VGPR (for AGPR copy)

      // reserved registers should already be frozen at this point

      // so we can avoid calling MRI.freezeReservedRegs and just use

      // MRI.reserveReg

      FuncInfo->setVGPRForAGPRCopy(UnusedLowVGPR);

      MRI.reserveReg(UnusedLowVGPR, TRI);

    }

  }

  // We initally reserved the highest available SGPR pair for long branches

  // now, after RA, we shift down to a lower unused one if one exists

  Register LongBranchReservedReg = FuncInfo->getLongBranchReservedReg();

  Register UnusedLowSGPR =

      TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass, MF);

  // If LongBranchReservedReg is null then we didn't find a long branch

  // and never reserved a register to begin with so there is nothing to

  // shift down. Then if UnusedLowSGPR is null, there isn't available lower

  // register to use so just keep the original one we set.

  if (LongBranchReservedReg && UnusedLowSGPR) {

    FuncInfo->setLongBranchReservedReg(UnusedLowSGPR);

    MRI.reserveReg(UnusedLowSGPR, TRI);

  }

}


// The special SGPR spills like the one needed for FP, BP or any reserved

// registers delayed until frame lowering.


void SIFrameLowering::determinePrologEpilogSGPRSaves(

    MachineFunction &MF, BitVector &SavedVGPRs,

    bool NeedExecCopyReservedReg) const {

  MachineFrameInfo &FrameInfo = MF.getFrameInfo();

  MachineRegisterInfo &MRI = MF.getRegInfo();

  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIRegisterInfo *TRI = ST.getRegisterInfo();

  LiveRegUnits LiveUnits;

  LiveUnits.init(*TRI);

  // Initially mark callee saved registers as used so we will not choose them

  // while looking for scratch SGPRs.

  const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();

  for (unsigned I = 0; CSRegs[I]; ++I)

    LiveUnits.addReg(CSRegs[I]);


  const TargetRegisterClass &RC = *TRI->getWaveMaskRegClass();


  Register ReservedRegForExecCopy = MFI->getSGPRForEXECCopy();

  if (NeedExecCopyReservedReg ||

      (ReservedRegForExecCopy &&

       MRI.isPhysRegUsed(ReservedRegForExecCopy, /*SkipRegMaskTest=*/true))) {

    MRI.reserveReg(ReservedRegForExecCopy, TRI);

    Register UnusedScratchReg = findUnusedRegister(MRI, LiveUnits, RC);

    if (UnusedScratchReg) {

      // If found any unused scratch SGPR, reserve the register itself for Exec

      // copy and there is no need for any spills in that case.

      MFI->setSGPRForEXECCopy(UnusedScratchReg);

      MRI.replaceRegWith(ReservedRegForExecCopy, UnusedScratchReg);

      LiveUnits.addReg(UnusedScratchReg);

    } else {

      // Needs spill.

      assert(!MFI->hasPrologEpilogSGPRSpillEntry(ReservedRegForExecCopy) &&

             "Re-reserving spill slot for EXEC copy register");

      getVGPRSpillLaneOrTempRegister(MF, LiveUnits, ReservedRegForExecCopy, RC,

                                     /*IncludeScratchCopy=*/false);

    }

  } else if (ReservedRegForExecCopy) {

    // Reset it at this point. There are no whole-wave copies and spills

    // encountered.

    MFI->setSGPRForEXECCopy(AMDGPU::NoRegister);

  }


  // hasFP only knows about stack objects that already exist. We're now

  // determining the stack slots that will be created, so we have to predict

  // them. Stack objects force FP usage with calls.

  //

  // Note a new VGPR CSR may be introduced if one is used for the spill, but we

  // don't want to report it here.

  //

  // FIXME: Is this really hasReservedCallFrame?

  const bool WillHaveFP =

      FrameInfo.hasCalls() &&

      (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));


  if (WillHaveFP || hasFP(MF)) {

    Register FramePtrReg = MFI->getFrameOffsetReg();

    assert(!MFI->hasPrologEpilogSGPRSpillEntry(FramePtrReg) &&

           "Re-reserving spill slot for FP");

    getVGPRSpillLaneOrTempRegister(MF, LiveUnits, FramePtrReg);

  }


  if (TRI->hasBasePointer(MF)) {

    Register BasePtrReg = TRI->getBaseRegister();

    assert(!MFI->hasPrologEpilogSGPRSpillEntry(BasePtrReg) &&

           "Re-reserving spill slot for BP");

    getVGPRSpillLaneOrTempRegister(MF, LiveUnits, BasePtrReg);

  }

}


// Only report VGPRs to generic code.


void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,

                                           BitVector &SavedVGPRs,

                                           RegScavenger *RS) const {

  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();


  // If this is a function with the amdgpu_cs_chain[_preserve] calling

  // convention and it doesn't contain any calls to llvm.amdgcn.cs.chain, then

  // we don't need to save and restore anything.

  if (MFI->isChainFunction() && !MF.getFrameInfo().hasTailCall())

    return;


  TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS);


  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIRegisterInfo *TRI = ST.getRegisterInfo();

  const SIInstrInfo *TII = ST.getInstrInfo();

  bool NeedExecCopyReservedReg = false;


  MachineInstr *ReturnMI = nullptr;

  for (MachineBasicBlock &MBB : MF) {

    for (MachineInstr &MI : MBB) {

      // TODO: Walking through all MBBs here would be a bad heuristic. Better

      // handle them elsewhere.

      if (TII->isWWMRegSpillOpcode(MI.getOpcode()))

        NeedExecCopyReservedReg = true;

      else if (MI.getOpcode() == AMDGPU::SI_RETURN ||

               MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||

               MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN ||

               (MFI->isChainFunction() &&

                TII->isChainCallOpcode(MI.getOpcode()))) {

        // We expect all return to be the same size.

        assert(!ReturnMI ||

               (count_if(MI.operands(), [](auto Op) { return Op.isReg(); }) ==

                count_if(ReturnMI->operands(), [](auto Op) { return Op.isReg(); })));

        ReturnMI = &MI;

      }

    }

  }


  SmallVector<Register> SortedWWMVGPRs;

  for (Register Reg : MFI->getWWMReservedRegs()) {

    // The shift-back is needed only for the VGPRs used for SGPR spills and they

    // are of 32-bit size. SIPreAllocateWWMRegs pass can add tuples into WWM

    // reserved registers.

    const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);

    if (TRI->getRegSizeInBits(*RC) != 32)

      continue;

    SortedWWMVGPRs.push_back(Reg);

  }


  sort(SortedWWMVGPRs, std::greater<Register>());

  MFI->shiftWwmVGPRsToLowestRange(MF, SortedWWMVGPRs, SavedVGPRs);


  if (MFI->isEntryFunction())

    return;


  if (MFI->isWholeWaveFunction()) {

    // In practice, all the VGPRs are WWM registers, and we will need to save at

    // least their inactive lanes. Add them to WWMReservedRegs.

    assert(!NeedExecCopyReservedReg &&

           "Whole wave functions can use the reg mapped for their i1 argument");


    // FIXME: Be more efficient!

    unsigned NumArchVGPRs = ST.has1024AddressableVGPRs() ? 1024 : 256;

    for (MCRegister Reg :

         AMDGPU::VGPR_32RegClass.getRegisters().take_front(NumArchVGPRs))

      if (MF.getRegInfo().isPhysRegModified(Reg)) {

        MFI->reserveWWMRegister(Reg);

        MF.begin()->addLiveIn(Reg);

      }

    MF.begin()->sortUniqueLiveIns();

  }


  // Remove any VGPRs used in the return value because these do not need to be saved.

  // This prevents CSR restore from clobbering return VGPRs.

  if (ReturnMI) {

    for (auto &Op : ReturnMI->operands()) {

      if (Op.isReg())

        SavedVGPRs.reset(Op.getReg());

    }

  }


  // Create the stack objects for WWM registers now.

  for (Register Reg : MFI->getWWMReservedRegs()) {

    const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);

    MFI->allocateWWMSpill(MF, Reg, TRI->getSpillSize(*RC),

                          TRI->getSpillAlign(*RC));

  }


  // Ignore the SGPRs the default implementation found.

  SavedVGPRs.clearBitsNotInMask(TRI->getAllVectorRegMask());


  // Do not save AGPRs prior to GFX90A because there was no easy way to do so.

  // In gfx908 there was do AGPR loads and stores and thus spilling also

  // require a temporary VGPR.

  if (!ST.hasGFX90AInsts())

    SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask());


  determinePrologEpilogSGPRSaves(MF, SavedVGPRs, NeedExecCopyReservedReg);


  // The Whole-Wave VGPRs need to be specially inserted in the prolog, so don't

  // allow the default insertion to handle them.

  for (auto &Reg : MFI->getWWMSpills())

    SavedVGPRs.reset(Reg.first);

}


void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,

                                               BitVector &SavedRegs,

                                               RegScavenger *RS) const {

  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);

  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

  if (MFI->isEntryFunction())

    return;


  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIRegisterInfo *TRI = ST.getRegisterInfo();


  // The SP is specifically managed and we don't want extra spills of it.

  SavedRegs.reset(MFI->getStackPtrOffsetReg());


  const BitVector AllSavedRegs = SavedRegs;

  SavedRegs.clearBitsInMask(TRI->getAllVectorRegMask());


  // We have to anticipate introducing CSR VGPR spills or spill of caller

  // save VGPR reserved for SGPR spills as we now always create stack entry

  // for it, if we don't have any stack objects already, since we require a FP

  // if there is a call and stack. We will allocate a VGPR for SGPR spills if

  // there are any SGPR spills. Whether they are CSR spills or otherwise.

  MachineFrameInfo &FrameInfo = MF.getFrameInfo();

  const bool WillHaveFP =

      FrameInfo.hasCalls() && (AllSavedRegs.any() || MFI->hasSpilledSGPRs());


  // FP will be specially managed like SP.

  if (WillHaveFP || hasFP(MF))

    SavedRegs.reset(MFI->getFrameOffsetReg());


  // Return address use with return instruction is hidden through the SI_RETURN

  // pseudo. Given that and since the IPRA computes actual register usage and

  // does not use CSR list, the clobbering of return address by function calls

  // (D117243) or otherwise (D120922) is ignored/not seen by the IPRA's register

  // usage collection. This will ensure save/restore of return address happens

  // in those scenarios.

  const MachineRegisterInfo &MRI = MF.getRegInfo();

  Register RetAddrReg = TRI->getReturnAddressReg(MF);

  if (!MFI->isEntryFunction() &&

      (FrameInfo.hasCalls() || MRI.isPhysRegModified(RetAddrReg))) {

    SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub0));

    SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub1));

  }

}


static void assignSlotsUsingVGPRBlocks(MachineFunction &MF,

                                       const GCNSubtarget &ST,

                                       std::vector<CalleeSavedInfo> &CSI,

                                       unsigned &MinCSFrameIndex,

                                       unsigned &MaxCSFrameIndex) {

  SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();

  MachineFrameInfo &MFI = MF.getFrameInfo();

  const SIRegisterInfo *TRI = ST.getRegisterInfo();


  assert(

      llvm::is_sorted(CSI,

                      [](const CalleeSavedInfo &A, const CalleeSavedInfo &B) {

                        return A.getReg() < B.getReg();

                      }) &&

      "Callee saved registers not sorted");


  auto CanUseBlockOps = [&](const CalleeSavedInfo &CSI) {

    return !CSI.isSpilledToReg() &&

           TRI->getPhysRegBaseClass(CSI.getReg()) == &AMDGPU::VGPR_32RegClass &&

           !FuncInfo->isWWMReservedRegister(CSI.getReg());

  };


  auto CSEnd = CSI.end();

  for (auto CSIt = CSI.begin(); CSIt != CSEnd; ++CSIt) {

    Register Reg = CSIt->getReg();

    if (!CanUseBlockOps(*CSIt))

      continue;


    // Find all the regs that will fit in a 32-bit mask starting at the current

    // reg and build said mask. It should have 1 for every register that's

    // included, with the current register as the least significant bit.

    uint32_t Mask = 1;

    CSEnd = std::remove_if(

        CSIt + 1, CSEnd, [&](const CalleeSavedInfo &CSI) -> bool {

          if (CanUseBlockOps(CSI) && CSI.getReg() < Reg + 32) {

            Mask |= 1 << (CSI.getReg() - Reg);

            return true;

          } else {

            return false;

          }

        });


    const TargetRegisterClass *BlockRegClass = TRI->getRegClassForBlockOp(MF);

    Register RegBlock =

        TRI->getMatchingSuperReg(Reg, AMDGPU::sub0, BlockRegClass);

    if (!RegBlock) {

      // We couldn't find a super register for the block. This can happen if

      // the register we started with is too high (e.g. v232 if the maximum is

      // v255). We therefore try to get the last register block and figure out

      // the mask from there.

      Register LastBlockStart =

          AMDGPU::VGPR0 + alignDown(Reg - AMDGPU::VGPR0, 32);

      RegBlock =

          TRI->getMatchingSuperReg(LastBlockStart, AMDGPU::sub0, BlockRegClass);

      assert(RegBlock && TRI->isSubRegister(RegBlock, Reg) &&

             "Couldn't find super register");

      int RegDelta = Reg - LastBlockStart;

      assert(RegDelta > 0 && llvm::countl_zero(Mask) >= RegDelta &&

             "Bad shift amount");

      Mask <<= RegDelta;

    }


    FuncInfo->setMaskForVGPRBlockOps(RegBlock, Mask);


    // The stack objects can be a bit smaller than the register block if we know

    // some of the high bits of Mask are 0. This may happen often with calling

    // conventions where the caller and callee-saved VGPRs are interleaved at

    // a small boundary (e.g. 8 or 16).

    int UnusedBits = llvm::countl_zero(Mask);

    unsigned BlockSize = TRI->getSpillSize(*BlockRegClass) - UnusedBits * 4;

    int FrameIdx =

        MFI.CreateStackObject(BlockSize, TRI->getSpillAlign(*BlockRegClass),

                              /*isSpillSlot=*/true);

    if ((unsigned)FrameIdx < MinCSFrameIndex)

      MinCSFrameIndex = FrameIdx;

    if ((unsigned)FrameIdx > MaxCSFrameIndex)

      MaxCSFrameIndex = FrameIdx;


    CSIt->setFrameIdx(FrameIdx);

    CSIt->setReg(RegBlock);

  }

  CSI.erase(CSEnd, CSI.end());

}


bool SIFrameLowering::assignCalleeSavedSpillSlots(

    MachineFunction &MF, const TargetRegisterInfo *TRI,

    std::vector<CalleeSavedInfo> &CSI, unsigned &MinCSFrameIndex,

    unsigned &MaxCSFrameIndex) const {

  if (CSI.empty())

    return true; // Early exit if no callee saved registers are modified!


  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  bool UseVGPRBlocks = ST.useVGPRBlockOpsForCSR();


  if (UseVGPRBlocks)

    assignSlotsUsingVGPRBlocks(MF, ST, CSI, MinCSFrameIndex, MaxCSFrameIndex);


  return assignCalleeSavedSpillSlots(MF, TRI, CSI) || UseVGPRBlocks;

}


bool SIFrameLowering::assignCalleeSavedSpillSlots(

    MachineFunction &MF, const TargetRegisterInfo *TRI,

    std::vector<CalleeSavedInfo> &CSI) const {

  if (CSI.empty())

    return true; // Early exit if no callee saved registers are modified!


  const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();

  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIRegisterInfo *RI = ST.getRegisterInfo();

  Register FramePtrReg = FuncInfo->getFrameOffsetReg();

  Register BasePtrReg = RI->getBaseRegister();

  Register SGPRForFPSaveRestoreCopy =

      FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);

  Register SGPRForBPSaveRestoreCopy =

      FuncInfo->getScratchSGPRCopyDstReg(BasePtrReg);

  if (!SGPRForFPSaveRestoreCopy && !SGPRForBPSaveRestoreCopy)

    return false;


  unsigned NumModifiedRegs = 0;


  if (SGPRForFPSaveRestoreCopy)

    NumModifiedRegs++;

  if (SGPRForBPSaveRestoreCopy)

    NumModifiedRegs++;


  for (auto &CS : CSI) {

    if (CS.getReg() == FramePtrReg.asMCReg() && SGPRForFPSaveRestoreCopy) {

      CS.setDstReg(SGPRForFPSaveRestoreCopy);

      if (--NumModifiedRegs)

        break;

    } else if (CS.getReg() == BasePtrReg.asMCReg() &&

               SGPRForBPSaveRestoreCopy) {

      CS.setDstReg(SGPRForBPSaveRestoreCopy);

      if (--NumModifiedRegs)

        break;

    }

  }


  return false;

}


bool SIFrameLowering::allocateScavengingFrameIndexesNearIncomingSP(

  const MachineFunction &MF) const {


  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const MachineFrameInfo &MFI = MF.getFrameInfo();

  const SIInstrInfo *TII = ST.getInstrInfo();

  uint64_t EstStackSize = MFI.estimateStackSize(MF);

  uint64_t MaxOffset = EstStackSize - 1;


  // We need the emergency stack slots to be allocated in range of the

  // MUBUF/flat scratch immediate offset from the base register, so assign these

  // first at the incoming SP position.

  //

  // TODO: We could try sorting the objects to find a hole in the first bytes

  // rather than allocating as close to possible. This could save a lot of space

  // on frames with alignment requirements.

  if (ST.enableFlatScratch()) {

    if (TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS,

                               SIInstrFlags::FlatScratch))

      return false;

  } else {

    if (TII->isLegalMUBUFImmOffset(MaxOffset))

      return false;

  }


  return true;

}


bool SIFrameLowering::spillCalleeSavedRegisters(

    MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,

    ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {

  MachineFunction *MF = MBB.getParent();

  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();

  if (!ST.useVGPRBlockOpsForCSR())

    return false;


  MachineFrameInfo &FrameInfo = MF->getFrameInfo();

  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();

  const SIInstrInfo *TII = ST.getInstrInfo();

  SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>();


  const TargetRegisterClass *BlockRegClass =

      static_cast<const SIRegisterInfo *>(TRI)->getRegClassForBlockOp(*MF);

  for (const CalleeSavedInfo &CS : CSI) {

    Register Reg = CS.getReg();

    if (!BlockRegClass->contains(Reg) ||

        !FuncInfo->hasMaskForVGPRBlockOps(Reg)) {

      spillCalleeSavedRegister(MBB, MI, CS, TII, TRI);

      continue;

    }


    // Build a scratch block store.

    uint32_t Mask = FuncInfo->getMaskForVGPRBlockOps(Reg);

    int FrameIndex = CS.getFrameIdx();

    MachinePointerInfo PtrInfo =

        MachinePointerInfo::getFixedStack(*MF, FrameIndex);

    MachineMemOperand *MMO =

        MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,

                                 FrameInfo.getObjectSize(FrameIndex),

                                 FrameInfo.getObjectAlign(FrameIndex));


    BuildMI(MBB, MI, MI->getDebugLoc(),

            TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_SAVE))

        .addReg(Reg, getKillRegState(false))

        .addFrameIndex(FrameIndex)

        .addReg(MFI->getStackPtrOffsetReg())

        .addImm(0)

        .addImm(Mask)

        .addMemOperand(MMO);


    FuncInfo->setHasSpilledVGPRs();


    // Add the register to the liveins. This is necessary because if any of the

    // VGPRs in the register block is reserved (e.g. if it's a WWM register),

    // then the whole block will be marked as reserved and `updateLiveness` will

    // skip it.

    MBB.addLiveIn(Reg);

  }

  MBB.sortUniqueLiveIns();


  return true;

}


bool SIFrameLowering::restoreCalleeSavedRegisters(

    MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,

    MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {

  MachineFunction *MF = MBB.getParent();

  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();

  if (!ST.useVGPRBlockOpsForCSR())

    return false;


  SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>();

  MachineFrameInfo &MFI = MF->getFrameInfo();

  const SIInstrInfo *TII = ST.getInstrInfo();

  const SIRegisterInfo *SITRI = static_cast<const SIRegisterInfo *>(TRI);

  const TargetRegisterClass *BlockRegClass = SITRI->getRegClassForBlockOp(*MF);

  for (const CalleeSavedInfo &CS : reverse(CSI)) {

    Register Reg = CS.getReg();

    if (!BlockRegClass->contains(Reg) ||

        !FuncInfo->hasMaskForVGPRBlockOps(Reg)) {

      restoreCalleeSavedRegister(MBB, MI, CS, TII, TRI);

      continue;

    }


    // Build a scratch block load.

    uint32_t Mask = FuncInfo->getMaskForVGPRBlockOps(Reg);

    int FrameIndex = CS.getFrameIdx();

    MachinePointerInfo PtrInfo =

        MachinePointerInfo::getFixedStack(*MF, FrameIndex);

    MachineMemOperand *MMO = MF->getMachineMemOperand(

        PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex),

        MFI.getObjectAlign(FrameIndex));


    auto MIB = BuildMI(MBB, MI, MI->getDebugLoc(),

                       TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE), Reg)

                   .addFrameIndex(FrameIndex)

                   .addReg(FuncInfo->getStackPtrOffsetReg())

                   .addImm(0)

                   .addImm(Mask)

                   .addMemOperand(MMO);

    SITRI->addImplicitUsesForBlockCSRLoad(MIB, Reg);


    // Add the register to the liveins. This is necessary because if any of the

    // VGPRs in the register block is reserved (e.g. if it's a WWM register),

    // then the whole block will be marked as reserved and `updateLiveness` will

    // skip it.

    MBB.addLiveIn(Reg);

  }


  MBB.sortUniqueLiveIns();

  return true;

}


MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(

  MachineFunction &MF,

  MachineBasicBlock &MBB,

  MachineBasicBlock::iterator I) const {

  int64_t Amount = I->getOperand(0).getImm();

  if (Amount == 0)

    return MBB.erase(I);


  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIInstrInfo *TII = ST.getInstrInfo();

  const DebugLoc &DL = I->getDebugLoc();

  unsigned Opc = I->getOpcode();

  bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();

  uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;


  if (!hasReservedCallFrame(MF)) {

    Amount = alignTo(Amount, getStackAlign());

    assert(isUInt<32>(Amount) && "exceeded stack address space size");

    const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

    Register SPReg = MFI->getStackPtrOffsetReg();


    Amount *= getScratchScaleFactor(ST);

    if (IsDestroy)

      Amount = -Amount;

    auto Add = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SPReg)

        .addReg(SPReg)

        .addImm(Amount);

    Add->getOperand(3).setIsDead(); // Mark SCC as dead.

  } else if (CalleePopAmount != 0) {

    llvm_unreachable("is this used?");

  }


  return MBB.erase(I);

}


/// Returns true if the frame will require a reference to the stack pointer.

///

/// This is the set of conditions common to setting up the stack pointer in a

/// kernel, and for using a frame pointer in a callable function.

///

/// FIXME: Should also check hasOpaqueSPAdjustment and if any inline asm

/// references SP.


static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI) {

  return MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint();

}


// The FP for kernels is always known 0, so we never really need to setup an

// explicit register for it. However, DisableFramePointerElim will force us to

// use a register for it.


bool SIFrameLowering::hasFPImpl(const MachineFunction &MF) const {

  const MachineFrameInfo &MFI = MF.getFrameInfo();


  // For entry & chain functions we can use an immediate offset in most cases,

  // so the presence of calls doesn't imply we need a distinct frame pointer.

  if (MFI.hasCalls() &&

      !MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction() &&

      !MF.getInfo<SIMachineFunctionInfo>()->isChainFunction()) {

    // All offsets are unsigned, so need to be addressed in the same direction

    // as stack growth.


    // FIXME: This function is pretty broken, since it can be called before the

    // frame layout is determined or CSR spills are inserted.

    return MFI.getStackSize() != 0;

  }


  return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() ||

         MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->hasStackRealignment(

             MF) ||

         mayReserveScratchForCWSR(MF) ||

         MF.getTarget().Options.DisableFramePointerElim(MF);

}


bool SIFrameLowering::mayReserveScratchForCWSR(

    const MachineFunction &MF) const {

  return MF.getInfo<SIMachineFunctionInfo>()->isDynamicVGPREnabled() &&

         AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) &&

         AMDGPU::isCompute(MF.getFunction().getCallingConv());

}


// This is essentially a reduced version of hasFP for entry functions. Since the

// stack pointer is known 0 on entry to kernels, we never really need an FP

// register. We may need to initialize the stack pointer depending on the frame

// properties, which logically overlaps many of the cases where an ordinary

// function would require an FP.

// Also used for chain functions. While not technically entry functions, chain

// functions may need to set up a stack pointer in some situations.


bool SIFrameLowering::requiresStackPointerReference(

    const MachineFunction &MF) const {

  // Callable functions always require a stack pointer reference.

  assert((MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction() ||

          MF.getInfo<SIMachineFunctionInfo>()->isChainFunction()) &&

         "only expected to call this for entry points and chain functions");


  const MachineFrameInfo &MFI = MF.getFrameInfo();


  // Entry points ordinarily don't need to initialize SP. We have to set it up

  // for callees if there are any. Also note tail calls are impossible/don't

  // make any sense for kernels.

  if (MFI.hasCalls())

    return true;


  // We still need to initialize the SP if we're doing anything weird that

  // references the SP, like variable sized stack objects.

  return frameTriviallyRequiresSP(MFI);

}


SubReg
unsigned SubReg
Definition AArch64AdvSIMDScalarPass.cpp:102

MRI
unsigned const MachineRegisterInfo * MRI
Definition AArch64AdvSIMDScalarPass.cpp:103

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

AMDGPUMCTargetDesc.h
Provides AMDGPU specific target descriptions.

AMDGPU.h

MBB
MachineBasicBlock & MBB
Definition ARMSLSHardening.cpp:71

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition ARMSLSHardening.cpp:73

MBBI
MachineBasicBlock MachineBasicBlock::iterator MBBI
Definition ARMSLSHardening.cpp:72

getParent
static const Function * getParent(const Value *V)
Definition BasicAliasAnalysis.cpp:885

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

GCNSubtarget.h
AMD GCN specific subclass of TargetSubtarget.

TII
const HexagonInstrInfo * TII
Definition HexagonCopyToCombine.cpp:118

MI
IRTranslator LLVM IR MI
Definition IRTranslator.cpp:110

InlinePriorityMode::Size
@ Size
Definition InlineOrder.cpp:25

LiveRegUnits.h
A set of register units.

F
#define F(x, y, z)
Definition MD5.cpp:55

I
#define I(x, y, z)
Definition MD5.cpp:58

MachineFrameInfo.h

Reg
Register Reg
Definition MachineSink.cpp:2117

TRI
Register const TargetRegisterInfo * TRI
Definition MachineSink.cpp:2118

Register
Promote Memory to Register
Definition Mem2Reg.cpp:110

FPReg
static constexpr MCPhysReg FPReg
Definition RISCVFrameLowering.cpp:51

SPReg
static constexpr MCPhysReg SPReg
Definition RISCVFrameLowering.cpp:54

Opc
auto Opc
Definition RISCVRedundantCopyElimination.cpp:75

RegisterScavenging.h
This file declares the machine register scavenger class.

buildEpilogRestore
static void buildEpilogRestore(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI, Register FrameReg, int64_t DwordOff=0)
Definition SIFrameLowering.cpp:157

EnableSpillVGPRToAGPR
static cl::opt< bool > EnableSpillVGPRToAGPR("amdgpu-spill-vgpr-to-agpr", cl::desc("Enable spilling VGPRs to AGPRs"), cl::ReallyHidden, cl::init(true))

getVGPRSpillLaneOrTempRegister
static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF, LiveRegUnits &LiveUnits, Register SGPR, const TargetRegisterClass &RC=AMDGPU::SReg_32_XM0_XEXECRegClass, bool IncludeScratchCopy=true)
Query target location for spilling SGPRs IncludeScratchCopy : Also look for free scratch SGPRs.
Definition SIFrameLowering.cpp:72

buildGitPtr
static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, const SIInstrInfo *TII, Register TargetReg)
Definition SIFrameLowering.cpp:177

allStackObjectsAreDead
static bool allStackObjectsAreDead(const MachineFrameInfo &MFI)
Definition SIFrameLowering.cpp:527

buildPrologSpill
static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI, Register FrameReg, int64_t DwordOff=0)
Definition SIFrameLowering.cpp:134

buildScratchExecCopy
static Register buildScratchExecCopy(LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool IsProlog, bool EnableInactiveLanes)
Definition SIFrameLowering.cpp:934

frameTriviallyRequiresSP
static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI)
Returns true if the frame will require a reference to the stack pointer.
Definition SIFrameLowering.cpp:2149

assignSlotsUsingVGPRBlocks
static void assignSlotsUsingVGPRBlocks(MachineFunction &MF, const GCNSubtarget &ST, std::vector< CalleeSavedInfo > &CSI, unsigned &MinCSFrameIndex, unsigned &MaxCSFrameIndex)
Definition SIFrameLowering.cpp:1833

initLiveUnits
static void initLiveUnits(LiveRegUnits &LiveUnits, const SIRegisterInfo &TRI, const SIMachineFunctionInfo *FuncInfo, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsProlog)
Definition SIFrameLowering.cpp:202

allSGPRSpillsAreDead
static bool allSGPRSpillsAreDead(const MachineFunction &MF)
Definition SIFrameLowering.cpp:1432

findScratchNonCalleeSaveRegister
static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI, LiveRegUnits &LiveUnits, const TargetRegisterClass &RC, bool Unused=false)
Definition SIFrameLowering.cpp:49

findUnusedRegister
static MCRegister findUnusedRegister(MachineRegisterInfo &MRI, const LiveRegUnits &LiveUnits, const TargetRegisterClass &RC)
Definition SIFrameLowering.cpp:34

getScratchScaleFactor
static unsigned getScratchScaleFactor(const GCNSubtarget &ST)
Definition SIFrameLowering.cpp:591

SIFrameLowering.h

SIMachineFunctionInfo.h

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition Debug.h:114

BlockSize
static const int BlockSize
Definition TarWriter.cpp:33

llvm::AMDGPUMachineFunction::isChainFunction
bool isChainFunction() const
Definition AMDGPUMachineFunction.h:103

llvm::AMDGPUMachineFunction::isEntryFunction
bool isEntryFunction() const
Definition AMDGPUMachineFunction.h:97

llvm::AMDGPUSubtarget::GFX10
@ GFX10
Definition AMDGPUSubtarget.h:42

llvm::AMDGPUSubtarget::GFX9
@ GFX9
Definition AMDGPUSubtarget.h:41

llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41

llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147

llvm::ArrayRef::slice
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:191

llvm::BitVector
Definition BitVector.h:82

llvm::BitVector::test
bool test(unsigned Idx) const
Definition BitVector.h:461

llvm::BitVector::reset
BitVector & reset()
Definition BitVector.h:392

llvm::BitVector::clearBitsNotInMask
void clearBitsNotInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsNotInMask - Clear a bit in this vector for every '0' bit in Mask.
Definition BitVector.h:725

llvm::BitVector::set
BitVector & set()
Definition BitVector.h:351

llvm::BitVector::any
bool any() const
any - Returns true if any bit is set.
Definition BitVector.h:170

llvm::BitVector::clearBitsInMask
void clearBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsInMask - Clear any bits in this vector that are set in Mask.
Definition BitVector.h:713

llvm::BitVector::set_bits
iterator_range< const_set_bits_iterator > set_bits() const
Definition BitVector.h:140

llvm::BitVector::empty
bool empty() const
empty - Tests whether there are no bits in this bitvector.
Definition BitVector.h:156

llvm::CalleeSavedInfo
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
Definition MachineFrameInfo.h:35

llvm::CalleeSavedInfo::getReg
MCRegister getReg() const
Definition MachineFrameInfo.h:62

llvm::DebugLoc
A debug info location.
Definition DebugLoc.h:124

llvm::Function
Definition Function.h:64

llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270

llvm::GCNSubtarget
Definition GCNSubtarget.h:34

llvm::GCNUserSGPRUsageInfo::hasImplicitBufferPtr
bool hasImplicitBufferPtr() const
Definition GCNSubtarget.h:1846

llvm::GCNUserSGPRUsageInfo::hasFlatScratchInit
bool hasFlatScratchInit() const
Definition GCNSubtarget.h:1858

llvm::LiveRegUnits
A set of register units used to track register liveness.
Definition LiveRegUnits.h:31

llvm::LiveRegUnits::available
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
Definition LiveRegUnits.h:117

llvm::LiveRegUnits::init
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
Definition LiveRegUnits.h:74

llvm::LiveRegUnits::addReg
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
Definition LiveRegUnits.h:87

llvm::LiveRegUnits::stepBackward
LLVM_ABI void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
Definition LiveRegUnits.cpp:44

llvm::LiveRegUnits::addLiveOuts
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
Definition LiveRegUnits.cpp:145

llvm::LiveRegUnits::removeReg
void removeReg(MCRegister Reg)
Removes all register units covered by physical register Reg.
Definition LiveRegUnits.h:103

llvm::LiveRegUnits::empty
bool empty() const
Returns true if the set is empty.
Definition LiveRegUnits.h:84

llvm::LiveRegUnits::addLiveIns
LLVM_ABI void addLiveIns(const MachineBasicBlock &MBB)
Adds registers living into block MBB.
Definition LiveRegUnits.cpp:158

llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition MCInstrDesc.h:199

llvm::MCRegister
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33

llvm::MachineBasicBlock
Definition MachineBasicBlock.h:122

llvm::MachineBasicBlock::addLiveIn
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
Definition MachineBasicBlock.h:478

llvm::MachineBasicBlock::iterator
MachineInstrBundleIterator< MachineInstr > iterator
Definition MachineBasicBlock.h:341

llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition MachineFrameInfo.h:108

llvm::MachineFrameInfo::hasVarSizedObjects
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
Definition MachineFrameInfo.h:359

llvm::MachineFrameInfo::getStackSize
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Definition MachineFrameInfo.h:589

llvm::MachineFrameInfo::hasCalls
bool hasCalls() const
Return true if the current function has any function calls.
Definition MachineFrameInfo.h:623

llvm::MachineFrameInfo::isFrameAddressTaken
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Definition MachineFrameInfo.h:375

llvm::MachineFrameInfo::getMaxAlign
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
Definition MachineFrameInfo.h:605

llvm::MachineFrameInfo::hasPatchPoint
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
Definition MachineFrameInfo.h:393

llvm::MachineFrameInfo::CreateSpillStackObject
LLVM_ABI int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
Definition MachineFrameInfo.cpp:66

llvm::MachineFrameInfo::hasTailCall
bool hasTailCall() const
Returns true if the function contains a tail call.
Definition MachineFrameInfo.h:648

llvm::MachineFrameInfo::getObjectAlign
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
Definition MachineFrameInfo.h:488

llvm::MachineFrameInfo::getObjectSize
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
Definition MachineFrameInfo.h:474

llvm::MachineFrameInfo::hasStackMap
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
Definition MachineFrameInfo.h:387

llvm::MachineFrameInfo::RemoveStackObject
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
Definition MachineFrameInfo.h:799

llvm::MachineFrameInfo::getObjectIndexEnd
int getObjectIndexEnd() const
Return one past the maximum frame object index.
Definition MachineFrameInfo.h:414

llvm::MachineFrameInfo::getStackID
uint8_t getStackID(int ObjectIdx) const
Definition MachineFrameInfo.h:752

llvm::MachineFrameInfo::getObjectOffset
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
Definition MachineFrameInfo.h:530

llvm::MachineFrameInfo::isFixedObjectIndex
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
Definition MachineFrameInfo.h:702

llvm::MachineFrameInfo::getObjectIndexBegin
int getObjectIndexBegin() const
Return the minimum frame object index.
Definition MachineFrameInfo.h:411

llvm::MachineFrameInfo::isDeadObjectIndex
bool isDeadObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a dead object.
Definition MachineFrameInfo.h:766

llvm::MachineFunction
Definition MachineFunction.h:286

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition MachineFunction.h:762

llvm::MachineFunction::getMachineMemOperand
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Definition MachineFunction.cpp:536

llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition MachineFunction.h:778

llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition MachineFunction.h:772

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition MachineFunction.h:733

llvm::MachineFunction::begin
iterator begin()
Definition MachineFunction.h:984

llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition MachineFunction.h:860

llvm::MachineFunction::front
const MachineBasicBlock & front() const
Definition MachineFunction.h:996

llvm::MachineFunction::getTarget
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition MachineFunction.h:758

llvm::MachineInstrBuilder
Definition MachineInstrBuilder.h:98

llvm::MachineInstrBuilder::addExternalSymbol
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
Definition MachineInstrBuilder.h:213

llvm::MachineInstrBuilder::setMIFlag
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
Definition MachineInstrBuilder.h:306

llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition MachineInstrBuilder.h:160

llvm::MachineInstrBuilder::addFrameIndex
const MachineInstrBuilder & addFrameIndex(int Idx) const
Definition MachineInstrBuilder.h:181

llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition MachineInstrBuilder.h:126

llvm::MachineInstrBuilder::addMemOperand
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Definition MachineInstrBuilder.h:231

llvm::MachineInstr
Representation of each machine instruction.
Definition MachineInstr.h:72

llvm::MachineInstr::operands
mop_range operands()
Definition MachineInstr.h:693

llvm::MachineInstr::FrameDestroy
@ FrameDestroy
Definition MachineInstr.h:90

llvm::MachineInstr::FrameSetup
@ FrameSetup
Definition MachineInstr.h:88

llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition MachineInstr.h:595

llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition MachineMemOperand.h:130

llvm::MachineMemOperand::MODereferenceable
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
Definition MachineMemOperand.h:145

llvm::MachineMemOperand::MOLoad
@ MOLoad
The memory access reads data.
Definition MachineMemOperand.h:137

llvm::MachineMemOperand::MOInvariant
@ MOInvariant
The memory access always returns the same value (or traps).
Definition MachineMemOperand.h:147

llvm::MachineMemOperand::MOStore
@ MOStore
The memory access writes data.
Definition MachineMemOperand.h:139

llvm::MachineOperand::setIsDead
void setIsDead(bool Val=true)
Definition MachineOperand.h:525

llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition MachineRegisterInfo.h:53

llvm::MachineRegisterInfo::getCalleeSavedRegs
LLVM_ABI const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
Definition MachineRegisterInfo.cpp:645

llvm::MachineRegisterInfo::addLiveIn
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
Definition MachineRegisterInfo.h:1004

llvm::MachineRegisterInfo::isPhysRegModified
LLVM_ABI bool isPhysRegModified(MCRegister PhysReg, bool SkipNoReturnDef=false) const
Return true if the specified register is modified in this function.
Definition MachineRegisterInfo.cpp:594

llvm::MutableArrayRef
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303

llvm::PrologEpilogSGPRSaveRestoreInfo
Definition SIMachineFunctionInfo.h:383

llvm::PrologEpilogSGPRSpillBuilder
Definition SIFrameLowering.cpp:224

llvm::PrologEpilogSGPRSpillBuilder::restore
void restore()
Definition SIFrameLowering.cpp:368

llvm::PrologEpilogSGPRSpillBuilder::PrologEpilogSGPRSpillBuilder
PrologEpilogSGPRSpillBuilder(Register Reg, const PrologEpilogSGPRSaveRestoreInfo SI, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, const SIInstrInfo *TII, const SIRegisterInfo &TRI, LiveRegUnits &LiveUnits, Register FrameReg)
Definition SIFrameLowering.cpp:338

llvm::PrologEpilogSGPRSpillBuilder::save
void save()
Definition SIFrameLowering.cpp:357

llvm::RegScavenger
Definition RegisterScavenging.h:34

llvm::RegScavenger::enterBasicBlockEnd
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
Definition RegisterScavenging.cpp:75

llvm::RegScavenger::backward
void backward()
Update internal register state and move MBB iterator backwards.
Definition RegisterScavenging.cpp:81

llvm::RegScavenger::addScavengingFrameIndex
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
Definition RegisterScavenging.h:112

llvm::Register
Wrapper class representing virtual and physical registers.
Definition Register.h:19

llvm::Register::asMCReg
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition Register.h:102

llvm::SIFrameLowering::determinePrologEpilogSGPRSaves
void determinePrologEpilogSGPRSaves(MachineFunction &MF, BitVector &SavedRegs, bool NeedExecCopyReservedReg) const
Definition SIFrameLowering.cpp:1611

llvm::SIFrameLowering::getFrameIndexReference
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
Definition SIFrameLowering.cpp:1448

llvm::SIFrameLowering::processFunctionBeforeFrameFinalized
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
Definition SIFrameLowering.cpp:1457

llvm::SIFrameLowering::mayReserveScratchForCWSR
bool mayReserveScratchForCWSR(const MachineFunction &MF) const
Definition SIFrameLowering.cpp:2179

llvm::SIFrameLowering::allocateScavengingFrameIndexesNearIncomingSP
bool allocateScavengingFrameIndexesNearIncomingSP(const MachineFunction &MF) const override
Control the placement of special register scavenging spill slots when allocating a stack frame.
Definition SIFrameLowering.cpp:1974

llvm::SIFrameLowering::requiresStackPointerReference
bool requiresStackPointerReference(const MachineFunction &MF) const
Definition SIFrameLowering.cpp:2193

llvm::SIFrameLowering::emitEntryFunctionPrologue
void emitEntryFunctionPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const
Definition SIFrameLowering.cpp:595

llvm::SIFrameLowering::determineCalleeSaves
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
Definition SIFrameLowering.cpp:1682

llvm::SIFrameLowering::emitCSRSpillStores
void emitCSRSpillStores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, Register FramePtrRegScratchCopy) const
Definition SIFrameLowering.cpp:978

llvm::SIFrameLowering::hasFPImpl
bool hasFPImpl(const MachineFunction &MF) const override
Definition SIFrameLowering.cpp:2156

llvm::SIFrameLowering::spillCalleeSavedRegisters
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
Definition SIFrameLowering.cpp:2002

llvm::SIFrameLowering::assignCalleeSavedSpillSlots
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
Definition SIFrameLowering.cpp:1933

llvm::SIFrameLowering::determineCalleeSavesSGPR
void determineCalleeSavesSGPR(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
Definition SIFrameLowering.cpp:1788

llvm::SIFrameLowering::emitEpilogue
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
Definition SIFrameLowering.cpp:1346

llvm::SIFrameLowering::emitCSRSpillRestores
void emitCSRSpillRestores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, Register FramePtrRegScratchCopy) const
Definition SIFrameLowering.cpp:1087

llvm::SIFrameLowering::processFunctionBeforeFrameIndicesReplaced
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
Definition SIFrameLowering.cpp:1569

llvm::SIFrameLowering::isSupportedStackID
bool isSupportedStackID(TargetStackID::Value ID) const override
Definition SIFrameLowering.cpp:919

llvm::SIFrameLowering::emitPrologue
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
Definition SIFrameLowering.cpp:1194

llvm::SIFrameLowering::eliminateCallFramePseudoInstr
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
Definition SIFrameLowering.cpp:2107

llvm::SIFrameLowering::restoreCalleeSavedRegisters
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
Definition SIFrameLowering.cpp:2057

llvm::SIInstrInfo
Definition SIInstrInfo.h:90

llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition SIMachineFunctionInfo.h:412

llvm::SIMachineFunctionInfo::getPrologEpilogSGPRSpills
ArrayRef< PrologEpilogSGPRSpill > getPrologEpilogSGPRSpills() const
Definition SIMachineFunctionInfo.h:689

llvm::SIMachineFunctionInfo::getWWMSpills
const WWMSpillsMap & getWWMSpills() const
Definition SIMachineFunctionInfo.h:680

llvm::SIMachineFunctionInfo::getAllScratchSGPRCopyDstRegs
void getAllScratchSGPRCopyDstRegs(SmallVectorImpl< Register > &Regs) const
Definition SIMachineFunctionInfo.h:733

llvm::SIMachineFunctionInfo::getAGPRSpillVGPRs
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
Definition SIMachineFunctionInfo.h:792

llvm::SIMachineFunctionInfo::setSGPRForEXECCopy
void setSGPRForEXECCopy(Register Reg)
Definition SIMachineFunctionInfo.h:798

llvm::SIMachineFunctionInfo::getNumPreloadedSGPRs
unsigned getNumPreloadedSGPRs() const
Definition SIMachineFunctionInfo.h:996

llvm::SIMachineFunctionInfo::shiftWwmVGPRsToLowestRange
void shiftWwmVGPRsToLowestRange(MachineFunction &MF, SmallVectorImpl< Register > &WWMVGPRs, BitVector &SavedVGPRs)
Definition SIMachineFunctionInfo.cpp:358

llvm::SIMachineFunctionInfo::setMaskForVGPRBlockOps
void setMaskForVGPRBlockOps(Register RegisterBlock, uint32_t Mask)
Definition SIMachineFunctionInfo.h:630

llvm::SIMachineFunctionInfo::getUserSGPRInfo
GCNUserSGPRUsageInfo & getUserSGPRInfo()
Definition SIMachineFunctionInfo.h:694

llvm::SIMachineFunctionInfo::allocateWWMSpill
void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size=4, Align Alignment=Align(4))
Definition SIMachineFunctionInfo.cpp:310

llvm::SIMachineFunctionInfo::getLongBranchReservedReg
Register getLongBranchReservedReg() const
Definition SIMachineFunctionInfo.h:1051

llvm::SIMachineFunctionInfo::getDynamicVGPRBlockSize
unsigned getDynamicVGPRBlockSize() const
Definition SIMachineFunctionInfo.h:844

llvm::SIMachineFunctionInfo::hasSpilledVGPRs
bool hasSpilledVGPRs() const
Definition SIMachineFunctionInfo.h:1069

llvm::SIMachineFunctionInfo::setVGPRToAGPRSpillDead
void setVGPRToAGPRSpillDead(int FrameIndex)
Definition SIMachineFunctionInfo.h:810

llvm::SIMachineFunctionInfo::isWholeWaveFunction
bool isWholeWaveFunction() const
Definition SIMachineFunctionInfo.h:687

llvm::SIMachineFunctionInfo::getStackPtrOffsetReg
Register getStackPtrOffsetReg() const
Definition SIMachineFunctionInfo.h:1047

llvm::SIMachineFunctionInfo::isStackRealigned
bool isStackRealigned() const
Definition SIMachineFunctionInfo.h:1085

llvm::SIMachineFunctionInfo::getScratchRSrcReg
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
Definition SIMachineFunctionInfo.h:1018

llvm::SIMachineFunctionInfo::getVGPRSpillAGPRs
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
Definition SIMachineFunctionInfo.h:800

llvm::SIMachineFunctionInfo::getScavengeFI
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
Definition SIMachineFunctionInfo.cpp:608

llvm::SIMachineFunctionInfo::getMaskForVGPRBlockOps
uint32_t getMaskForVGPRBlockOps(Register RegisterBlock) const
Definition SIMachineFunctionInfo.h:635

llvm::SIMachineFunctionInfo::hasMaskForVGPRBlockOps
bool hasMaskForVGPRBlockOps(Register RegisterBlock) const
Definition SIMachineFunctionInfo.h:639

llvm::SIMachineFunctionInfo::hasPrologEpilogSGPRSpillEntry
bool hasPrologEpilogSGPRSpillEntry(Register Reg) const
Definition SIMachineFunctionInfo.h:713

llvm::SIMachineFunctionInfo::getGITPtrLoReg
Register getGITPtrLoReg(const MachineFunction &MF) const
Definition SIMachineFunctionInfo.cpp:639

llvm::SIMachineFunctionInfo::setVGPRForAGPRCopy
void setVGPRForAGPRCopy(Register NewVGPRForAGPRCopy)
Definition SIMachineFunctionInfo.h:624

llvm::SIMachineFunctionInfo::allocateVGPRSpillToAGPR
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)
Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.
Definition SIMachineFunctionInfo.cpp:497

llvm::SIMachineFunctionInfo::splitWWMSpillRegisters
void splitWWMSpillRegisters(MachineFunction &MF, SmallVectorImpl< std::pair< Register, int > > &CalleeSavedRegs, SmallVectorImpl< std::pair< Register, int > > &ScratchRegs) const
Definition SIMachineFunctionInfo.cpp:335

llvm::SIMachineFunctionInfo::getSGPRForEXECCopy
Register getSGPRForEXECCopy() const
Definition SIMachineFunctionInfo.h:796

llvm::SIMachineFunctionInfo::isWWMReservedRegister
bool isWWMReservedRegister(Register Reg) const
Definition SIMachineFunctionInfo.h:683

llvm::SIMachineFunctionInfo::getSGPRSpillToPhysicalVGPRLanes
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const
Definition SIMachineFunctionInfo.h:762

llvm::SIMachineFunctionInfo::getVGPRForAGPRCopy
Register getVGPRForAGPRCopy() const
Definition SIMachineFunctionInfo.h:620

llvm::SIMachineFunctionInfo::allocateSGPRSpillToVGPRLane
bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool SpillToPhysVGPRLane=false, bool IsPrologEpilog=false)
Definition SIMachineFunctionInfo.cpp:450

llvm::SIMachineFunctionInfo::getFrameOffsetReg
Register getFrameOffsetReg() const
Definition SIMachineFunctionInfo.h:1027

llvm::SIMachineFunctionInfo::setLongBranchReservedReg
void setLongBranchReservedReg(Register Reg)
Definition SIMachineFunctionInfo.h:1041

llvm::SIMachineFunctionInfo::setHasSpilledVGPRs
void setHasSpilledVGPRs(bool Spill=true)
Definition SIMachineFunctionInfo.h:1073

llvm::SIMachineFunctionInfo::removeDeadFrameIndices
bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)
If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.
Definition SIMachineFunctionInfo.cpp:563

llvm::SIMachineFunctionInfo::setScratchReservedForDynamicVGPRs
void setScratchReservedForDynamicVGPRs(unsigned SizeInBytes)
Definition SIMachineFunctionInfo.h:851

llvm::SIMachineFunctionInfo::getPreloadedReg
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
Definition SIMachineFunctionInfo.h:977

llvm::SIMachineFunctionInfo::checkIndexInPrologEpilogSGPRSpills
bool checkIndexInPrologEpilogSGPRSpills(int FI) const
Definition SIMachineFunctionInfo.h:741

llvm::SIMachineFunctionInfo::getWWMReservedRegs
const ReservedRegSet & getWWMReservedRegs() const
Definition SIMachineFunctionInfo.h:681

llvm::SIMachineFunctionInfo::getImplicitBufferPtrUserSGPR
Register getImplicitBufferPtrUserSGPR() const
Definition SIMachineFunctionInfo.h:1057

llvm::SIMachineFunctionInfo::getPrologEpilogSGPRSaveRestoreInfo
const PrologEpilogSGPRSaveRestoreInfo & getPrologEpilogSGPRSaveRestoreInfo(Register Reg) const
Definition SIMachineFunctionInfo.h:752

llvm::SIMachineFunctionInfo::setIsStackRealigned
void setIsStackRealigned(bool Realigned=true)
Definition SIMachineFunctionInfo.h:1089

llvm::SIMachineFunctionInfo::getGITPtrHigh
unsigned getGITPtrHigh() const
Definition SIMachineFunctionInfo.h:982

llvm::SIMachineFunctionInfo::hasSpilledSGPRs
bool hasSpilledSGPRs() const
Definition SIMachineFunctionInfo.h:1061

llvm::SIMachineFunctionInfo::addToPrologEpilogSGPRSpills
void addToPrologEpilogSGPRSpills(Register Reg, PrologEpilogSGPRSaveRestoreInfo SI)
Definition SIMachineFunctionInfo.h:698

llvm::SIMachineFunctionInfo::getScratchSGPRCopyDstReg
Register getScratchSGPRCopyDstReg(Register Reg) const
Definition SIMachineFunctionInfo.h:721

llvm::SIMachineFunctionInfo::setScratchRSrcReg
void setScratchRSrcReg(Register Reg)
Definition SIMachineFunctionInfo.h:1022

llvm::SIMachineFunctionInfo::reserveWWMRegister
void reserveWWMRegister(Register Reg)
Definition SIMachineFunctionInfo.h:657

llvm::SIRegisterInfo
Definition SIRegisterInfo.h:40

llvm::SIRegisterInfo::getFrameRegister
Register getFrameRegister(const MachineFunction &MF) const override
Definition SIRegisterInfo.cpp:513

llvm::SIRegisterInfo::getRegClassForBlockOp
const TargetRegisterClass * getRegClassForBlockOp(const MachineFunction &MF) const
Definition SIRegisterInfo.h:168

llvm::SIRegisterInfo::addImplicitUsesForBlockCSRLoad
void addImplicitUsesForBlockCSRLoad(MachineInstrBuilder &MIB, Register BlockReg) const
Definition SIRegisterInfo.cpp:1929

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition SmallVector.h:574

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition SmallVector.h:414

llvm::SmallVectorTemplateCommon::empty
bool empty() const
Definition SmallVector.h:82

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition SmallVector.h:1197

llvm::StackOffset
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:31

llvm::StackOffset::getFixed
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:47

llvm::TargetFrameLowering::hasFP
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
Definition TargetFrameLowering.h:310

llvm::TargetFrameLowering::hasReservedCallFrame
virtual bool hasReservedCallFrame(const MachineFunction &MF) const
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
Definition TargetFrameLowering.h:319

llvm::TargetFrameLowering::determineCalleeSaves
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
Definition TargetFrameLoweringImpl.cpp:96

llvm::TargetFrameLowering::restoreCalleeSavedRegister
void restoreCalleeSavedRegister(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const CalleeSavedInfo &CS, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
Definition TargetFrameLoweringImpl.cpp:205

llvm::TargetFrameLowering::spillCalleeSavedRegister
void spillCalleeSavedRegister(MachineBasicBlock &SaveBlock, MachineBasicBlock::iterator MI, const CalleeSavedInfo &CS, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
spillCalleeSavedRegister - Default implementation for spilling a single callee saved register.
Definition TargetFrameLoweringImpl.cpp:187

llvm::TargetFrameLowering::getStackAlign
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
Definition TargetFrameLowering.h:106

llvm::TargetMachine::Options
TargetOptions Options
Definition TargetMachine.h:124

llvm::TargetOptions::DisableFramePointerElim
LLVM_ABI bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
Definition TargetOptionsImpl.cpp:24

llvm::TargetRegisterClass
Definition TargetRegisterInfo.h:45

llvm::TargetRegisterClass::contains
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
Definition TargetRegisterInfo.h:95

llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition TargetRegisterInfo.h:237

llvm::cl::opt
Definition CommandLine.h:1429

uint32_t

uint64_t

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

TargetMachine.h

llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition AMDGPUAddrSpace.h:35

llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition AMDGPUAddrSpace.h:36

llvm::AMDGPU::HSAMD::Kernel::Arg::Key::Align
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
Definition AMDGPUMetadata.h:183

llvm::AMDGPU::Hwreg::ID_HW_ID2
@ ID_HW_ID2
Definition SIDefines.h:521

llvm::AMDGPU::Hwreg::OFFSET_ME_ID
@ OFFSET_ME_ID
Definition SIDefines.h:556

llvm::AMDGPU::IsaInfo::getVGPRAllocGranule
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
Definition AMDGPUBaseInfo.cpp:1347

llvm::AMDGPU::convertSMRDOffsetUnits
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
Definition AMDGPUBaseInfo.cpp:3243

llvm::AMDGPU::isEntryFunctionCC
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
Definition AMDGPUBaseInfo.h:1454

llvm::AMDGPU::isInlinableLiteral32
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
Definition AMDGPUBaseInfo.cpp:2962

llvm::AMDGPU::isCompute
LLVM_READNONE constexpr bool isCompute(CallingConv::ID CC)
Definition AMDGPUBaseInfo.h:1449

llvm::ARM_MB::ST
@ ST
Definition ARMBaseInfo.h:73

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24

llvm::CallingConv::AMDGPU_CS
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
Definition CallingConv.h:197

llvm::RegState::ImplicitDefine
@ ImplicitDefine
Definition MachineInstrBuilder.h:66

llvm::RegState::Kill
@ Kill
The last use of a register.
Definition MachineInstrBuilder.h:51

llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition MachineInstrBuilder.h:55

llvm::SIInstrFlags::FlatScratch
@ FlatScratch
Definition SIDefines.h:158

llvm::TargetStackID::Value
Value
Definition TargetFrameLowering.h:30

llvm::TargetStackID::SGPRSpill
@ SGPRSpill
Definition TargetFrameLowering.h:32

llvm::TargetStackID::ScalableVector
@ ScalableVector
Definition TargetFrameLowering.h:33

llvm::TargetStackID::NoAlloc
@ NoAlloc
Definition TargetFrameLowering.h:35

llvm::TargetStackID::WasmLocal
@ WasmLocal
Definition TargetFrameLowering.h:34

llvm::TargetStackID::Default
@ Default
Definition TargetFrameLowering.h:31

llvm::cl::ReallyHidden
@ ReallyHidden
Definition CommandLine.h:139

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition CommandLine.h:444

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition AddressRanges.h:18

llvm::Offset
@ Offset
Definition DWP.cpp:477

llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition MachineInstrBuilder.h:369

llvm::SGPRSaveKind::SPILL_TO_MEM
@ SPILL_TO_MEM
Definition SIMachineFunctionInfo.h:380

llvm::SGPRSaveKind::SPILL_TO_VGPR_LANE
@ SPILL_TO_VGPR_LANE
Definition SIMachineFunctionInfo.h:379

llvm::SGPRSaveKind::COPY_TO_SCRATCH_SGPR
@ COPY_TO_SCRATCH_SGPR
Definition SIMachineFunctionInfo.h:378

llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:626

llvm::alignDown
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:557

llvm::countl_zero
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:203

llvm::reverse
auto reverse(ContainerTy &&C)
Definition STLExtras.h:400

llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1632

llvm::Hi_32
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:159

llvm::dbgs
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207

llvm::make_first_range
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1407

llvm::report_fatal_error
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167

llvm::is_sorted
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition STLExtras.h:1902

llvm::isUInt
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198

llvm::Lo_32
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:164

llvm::RecurKind::And
@ And
Bitwise or logical AND of integers.
Definition IVDescriptors.h:42

llvm::RecurKind::Add
@ Add
Sum of integers.
Definition IVDescriptors.h:37

llvm::getKillRegState
unsigned getKillRegState(bool B)
Definition MachineInstrBuilder.h:543

llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21

llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:155

llvm::Op
DWARFExpression::Operation Op
Definition DWARFExpressionPrinter.cpp:22

llvm::ArrayRef
ArrayRef(const T &OneElt) -> ArrayRef< T >

llvm::count_if
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1943

llvm::printReg
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Definition TargetRegisterInfo.cpp:107

llvm::AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT
@ FLAT_SCRATCH_INIT
Definition AMDGPUArgumentUsageInfo.h:112

llvm::AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET
@ PRIVATE_SEGMENT_WAVE_BYTE_OFFSET
Definition AMDGPUArgumentUsageInfo.h:117

llvm::AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER
@ PRIVATE_SEGMENT_BUFFER
Definition AMDGPUArgumentUsageInfo.h:107

llvm::AMDGPU::EncodingFields< HwregId, HwregOffset, HwregSize >::encode
static constexpr uint64_t encode(Fields... Values)
Definition AMDGPUBaseInfo.h:403

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39

llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:85

llvm::MIPatternMatch::And
Matching combinators.
Definition MIPatternMatch.h:313

llvm::MachinePointerInfo
This class contains a discriminated union of information about pointers in memory operands,...
Definition MachineMemOperand.h:42

llvm::MachinePointerInfo::getFixedStack
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Definition MachineOperand.cpp:1064

llvm::cl::desc
Definition CommandLine.h:410