LLVM: lib/Target/AMDGPU/SIMachineFunctionInfo.cpp Source File

//===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//


#include "SIMachineFunctionInfo.h"

#include "AMDGPUSubtarget.h"

#include "GCNSubtarget.h"

#include "MCTargetDesc/AMDGPUMCTargetDesc.h"

#include "SIRegisterInfo.h"

#include "Utils/AMDGPUBaseInfo.h"

#include "llvm/CodeGen/LiveIntervals.h"

#include "llvm/CodeGen/MIRParser/MIParser.h"

#include "llvm/CodeGen/MachineBasicBlock.h"

#include "llvm/CodeGen/MachineFrameInfo.h"

#include "llvm/CodeGen/MachineFunction.h"

#include "llvm/CodeGen/MachineRegisterInfo.h"

#include "llvm/IR/CallingConv.h"

#include "llvm/IR/DiagnosticInfo.h"

#include "llvm/IR/Function.h"

#include <cassert>

#include <optional>

#include <vector>


enum { MAX_LANES = 64 };


using namespace llvm;


// TODO -- delete this flag once we have more robust mechanisms to allocate the

// optimal RC for Opc and Dest of MFMA. In particular, there are high RP cases

// where it is better to produce the VGPR form (e.g. if there are VGPR users

// of the MFMA result).

static cl::opt<bool> MFMAVGPRForm(

    "amdgpu-mfma-vgpr-form", cl::Hidden,

    cl::desc("Whether to force use VGPR for Opc and Dest of MFMA. If "

             "unspecified, default to compiler heuristics"),

    cl::init(false));


const GCNTargetMachine &getTM(const GCNSubtarget *STI) {

  const SITargetLowering *TLI = STI->getTargetLowering();

  return static_cast<const GCNTargetMachine &>(TLI->getTargetMachine());

}


SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,

                                             const GCNSubtarget *STI)

    : AMDGPUMachineFunction(F, *STI), Mode(F, *STI), GWSResourcePSV(getTM(STI)),

      UserSGPRInfo(F, *STI), WorkGroupIDX(false), WorkGroupIDY(false),

      WorkGroupIDZ(false), WorkGroupInfo(false), LDSKernelId(false),

      PrivateSegmentWaveByteOffset(false), WorkItemIDX(false),

      WorkItemIDY(false), WorkItemIDZ(false), ImplicitArgPtr(false),

      GITPtrHigh(0xffffffff), HighBitsOf32BitAddress(0),

      IsWholeWaveFunction(F.getCallingConv() ==

                          CallingConv::AMDGPU_Gfx_WholeWave) {

  const GCNSubtarget &ST = *STI;

  FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);

  WavesPerEU = ST.getWavesPerEU(F);

  MaxNumWorkGroups = ST.getMaxNumWorkGroups(F);

  assert(MaxNumWorkGroups.size() == 3);


  // Temporarily check both the attribute and the subtarget feature, until the

  // latter is completely removed.

  DynamicVGPRBlockSize = AMDGPU::getDynamicVGPRBlockSize(F);

  if (DynamicVGPRBlockSize == 0 && ST.isDynamicVGPREnabled())

    DynamicVGPRBlockSize = ST.getDynamicVGPRBlockSize();


  Occupancy = ST.computeOccupancy(F, getLDSSize()).second;

  CallingConv::ID CC = F.getCallingConv();


  VRegFlags.reserve(1024);


  const bool IsKernel = CC == CallingConv::AMDGPU_KERNEL ||

                        CC == CallingConv::SPIR_KERNEL;


  if (IsKernel) {

    WorkGroupIDX = true;

    WorkItemIDX = true;

  } else if (CC == CallingConv::AMDGPU_PS) {

    PSInputAddr = AMDGPU::getInitialPSInputAddr(F);

  }


  MayNeedAGPRs = ST.hasMAIInsts();

  if (ST.hasGFX90AInsts()) {

    // FIXME: MayNeedAGPRs is a misnomer for how this is used. MFMA selection

    // should be separated from availability of AGPRs

    if (MFMAVGPRForm ||

        (ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&

         !mayUseAGPRs(F)))

      MayNeedAGPRs = false; // We will select all MAI with VGPR operands.

  }


  if (AMDGPU::isChainCC(CC)) {

    // Chain functions don't receive an SP from their caller, but are free to

    // set one up. For now, we can use s32 to match what amdgpu_gfx functions

    // would use if called, but this can be revisited.

    // FIXME: Only reserve this if we actually need it.

    StackPtrOffsetReg = AMDGPU::SGPR32;


    ScratchRSrcReg = AMDGPU::SGPR48_SGPR49_SGPR50_SGPR51;


    ArgInfo.PrivateSegmentBuffer =

        ArgDescriptor::createRegister(ScratchRSrcReg);


    ImplicitArgPtr = false;

  } else if (!isEntryFunction()) {

    if (CC != CallingConv::AMDGPU_Gfx &&

        CC != CallingConv::AMDGPU_Gfx_WholeWave)

      ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;


    FrameOffsetReg = AMDGPU::SGPR33;

    StackPtrOffsetReg = AMDGPU::SGPR32;


    if (!ST.enableFlatScratch()) {

      // Non-entry functions have no special inputs for now, other registers

      // required for scratch access.

      ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;


      ArgInfo.PrivateSegmentBuffer =

        ArgDescriptor::createRegister(ScratchRSrcReg);

    }


    if (!F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))

      ImplicitArgPtr = true;

  } else {

    ImplicitArgPtr = false;

    MaxKernArgAlign =

        std::max(ST.getAlignmentForImplicitArgPtr(), MaxKernArgAlign);

  }


  if (!AMDGPU::isGraphics(CC) ||

      ((CC == CallingConv::AMDGPU_CS || CC == CallingConv::AMDGPU_Gfx) &&

       ST.hasArchitectedSGPRs())) {

    if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x"))

      WorkGroupIDX = true;


    if (!F.hasFnAttribute("amdgpu-no-workgroup-id-y"))

      WorkGroupIDY = true;


    if (!F.hasFnAttribute("amdgpu-no-workgroup-id-z"))

      WorkGroupIDZ = true;

  }


  if (!AMDGPU::isGraphics(CC)) {

    if (IsKernel || !F.hasFnAttribute("amdgpu-no-workitem-id-x"))

      WorkItemIDX = true;


    if (!F.hasFnAttribute("amdgpu-no-workitem-id-y") &&

        ST.getMaxWorkitemID(F, 1) != 0)

      WorkItemIDY = true;


    if (!F.hasFnAttribute("amdgpu-no-workitem-id-z") &&

        ST.getMaxWorkitemID(F, 2) != 0)

      WorkItemIDZ = true;


    if (!IsKernel && !F.hasFnAttribute("amdgpu-no-lds-kernel-id"))

      LDSKernelId = true;

  }


  if (isEntryFunction()) {

    // X, XY, and XYZ are the only supported combinations, so make sure Y is

    // enabled if Z is.

    if (WorkItemIDZ)

      WorkItemIDY = true;


    if (!ST.flatScratchIsArchitected()) {

      PrivateSegmentWaveByteOffset = true;


      // HS and GS always have the scratch wave offset in SGPR5 on GFX9.

      if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&

          (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))

        ArgInfo.PrivateSegmentWaveByteOffset =

            ArgDescriptor::createRegister(AMDGPU::SGPR5);

    }

  }


  Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");

  StringRef S = A.getValueAsString();

  if (!S.empty())

    S.consumeInteger(0, GITPtrHigh);


  A = F.getFnAttribute("amdgpu-32bit-address-high-bits");

  S = A.getValueAsString();

  if (!S.empty())

    S.consumeInteger(0, HighBitsOf32BitAddress);


  MaxMemoryClusterDWords = F.getFnAttributeAsParsedInteger(

      "amdgpu-max-memory-cluster-dwords", DefaultMemoryClusterDWordsLimit);


  // On GFX908, in order to guarantee copying between AGPRs, we need a scratch

  // VGPR available at all times. For now, reserve highest available VGPR. After

  // RA, shift it to the lowest available unused VGPR if the one exist.

  if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {

    VGPRForAGPRCopy =

        AMDGPU::VGPR_32RegClass.getRegister(ST.getMaxNumVGPRs(F) - 1);

  }

}


MachineFunctionInfo *SIMachineFunctionInfo::clone(

    BumpPtrAllocator &Allocator, MachineFunction &DestMF,

    const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)

    const {

  return DestMF.cloneInfo<SIMachineFunctionInfo>(*this);

}


void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {

  limitOccupancy(getMaxWavesPerEU());

  const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();

  limitOccupancy(ST.getOccupancyWithWorkGroupSizes(MF).second);

}


Register SIMachineFunctionInfo::addPrivateSegmentBuffer(

  const SIRegisterInfo &TRI) {

  ArgInfo.PrivateSegmentBuffer =

    ArgDescriptor::createRegister(TRI.getMatchingSuperReg(

    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));

  NumUserSGPRs += 4;

  return ArgInfo.PrivateSegmentBuffer.getRegister();

}


Register SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {

  ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(

    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));

  NumUserSGPRs += 2;

  return ArgInfo.DispatchPtr.getRegister();

}


Register SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {

  ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(

    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));

  NumUserSGPRs += 2;

  return ArgInfo.QueuePtr.getRegister();

}


Register SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {

  ArgInfo.KernargSegmentPtr

    = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(

    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));

  NumUserSGPRs += 2;

  return ArgInfo.KernargSegmentPtr.getRegister();

}


Register SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {

  ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(

    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));

  NumUserSGPRs += 2;

  return ArgInfo.DispatchID.getRegister();

}


Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {

  ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(

    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));

  NumUserSGPRs += 2;

  return ArgInfo.FlatScratchInit.getRegister();

}


Register SIMachineFunctionInfo::addPrivateSegmentSize(const SIRegisterInfo &TRI) {

  ArgInfo.PrivateSegmentSize = ArgDescriptor::createRegister(getNextUserSGPR());

  NumUserSGPRs += 1;

  return ArgInfo.PrivateSegmentSize.getRegister();

}


Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {

  ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(

    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));

  NumUserSGPRs += 2;

  return ArgInfo.ImplicitBufferPtr.getRegister();

}


Register SIMachineFunctionInfo::addLDSKernelId() {

  ArgInfo.LDSKernelId = ArgDescriptor::createRegister(getNextUserSGPR());

  NumUserSGPRs += 1;

  return ArgInfo.LDSKernelId.getRegister();

}


SmallVectorImpl<MCRegister> *SIMachineFunctionInfo::addPreloadedKernArg(

    const SIRegisterInfo &TRI, const TargetRegisterClass *RC,

    unsigned AllocSizeDWord, int KernArgIdx, int PaddingSGPRs) {

  auto [It, Inserted] = ArgInfo.PreloadKernArgs.try_emplace(KernArgIdx);

  assert(Inserted && "Preload kernel argument allocated twice.");

  NumUserSGPRs += PaddingSGPRs;

  // If the available register tuples are aligned with the kernarg to be

  // preloaded use that register, otherwise we need to use a set of SGPRs and

  // merge them.

  if (!ArgInfo.FirstKernArgPreloadReg)

    ArgInfo.FirstKernArgPreloadReg = getNextUserSGPR();

  Register PreloadReg =

      TRI.getMatchingSuperReg(getNextUserSGPR(), AMDGPU::sub0, RC);

  auto &Regs = It->second.Regs;

  if (PreloadReg &&

      (RC == &AMDGPU::SReg_32RegClass || RC == &AMDGPU::SReg_64RegClass)) {

    Regs.push_back(PreloadReg);

    NumUserSGPRs += AllocSizeDWord;

  } else {

    Regs.reserve(AllocSizeDWord);

    for (unsigned I = 0; I < AllocSizeDWord; ++I) {

      Regs.push_back(getNextUserSGPR());

      NumUserSGPRs++;

    }

  }


  // Track the actual number of SGPRs that HW will preload to.

  UserSGPRInfo.allocKernargPreloadSGPRs(AllocSizeDWord + PaddingSGPRs);

  return &Regs;

}


void SIMachineFunctionInfo::allocateWWMSpill(MachineFunction &MF, Register VGPR,

                                             uint64_t Size, Align Alignment) {

  // Skip if it is an entry function or the register is already added.

  if (isEntryFunction() || WWMSpills.count(VGPR))

    return;


  // Skip if this is a function with the amdgpu_cs_chain or

  // amdgpu_cs_chain_preserve calling convention and this is a scratch register.

  // We never need to allocate a spill for these because we don't even need to

  // restore the inactive lanes for them (they're scratchier than the usual

  // scratch registers). We only need to do this if we have calls to

  // llvm.amdgcn.cs.chain (otherwise there's no one to save them for, since

  // chain functions do not return) and the function did not contain a call to

  // llvm.amdgcn.init.whole.wave (since in that case there are no inactive lanes

  // when entering the function).

  if (isChainFunction() &&

      (SIRegisterInfo::isChainScratchRegister(VGPR) ||

       !MF.getFrameInfo().hasTailCall() || hasInitWholeWave()))

    return;


  WWMSpills.insert(std::make_pair(

      VGPR, MF.getFrameInfo().CreateSpillStackObject(Size, Alignment)));

}


// Separate out the callee-saved and scratch registers.

void SIMachineFunctionInfo::splitWWMSpillRegisters(

    MachineFunction &MF,

    SmallVectorImpl<std::pair<Register, int>> &CalleeSavedRegs,

    SmallVectorImpl<std::pair<Register, int>> &ScratchRegs) const {

  const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();

  for (auto &Reg : WWMSpills) {

    if (isCalleeSavedReg(CSRegs, Reg.first))

      CalleeSavedRegs.push_back(Reg);

    else

      ScratchRegs.push_back(Reg);

  }

}


bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,

                                             MCPhysReg Reg) const {

  for (unsigned I = 0; CSRegs[I]; ++I) {

    if (CSRegs[I] == Reg)

      return true;

  }


  return false;

}


void SIMachineFunctionInfo::shiftWwmVGPRsToLowestRange(

    MachineFunction &MF, SmallVectorImpl<Register> &WWMVGPRs,

    BitVector &SavedVGPRs) {

  const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();

  MachineRegisterInfo &MRI = MF.getRegInfo();

  for (unsigned I = 0, E = WWMVGPRs.size(); I < E; ++I) {

    Register Reg = WWMVGPRs[I];

    Register NewReg =

        TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);

    if (!NewReg || NewReg >= Reg)

      break;


    MRI.replaceRegWith(Reg, NewReg);


    // Update various tables with the new VGPR.

    WWMVGPRs[I] = NewReg;

    WWMReservedRegs.remove(Reg);

    WWMReservedRegs.insert(NewReg);

    MRI.reserveReg(NewReg, TRI);


    // Replace the register in SpillPhysVGPRs. This is needed to look for free

    // lanes while spilling special SGPRs like FP, BP, etc. during PEI.

    auto *RegItr = llvm::find(SpillPhysVGPRs, Reg);

    if (RegItr != SpillPhysVGPRs.end()) {

      unsigned Idx = std::distance(SpillPhysVGPRs.begin(), RegItr);

      SpillPhysVGPRs[Idx] = NewReg;

    }


    // The generic `determineCalleeSaves` might have set the old register if it

    // is in the CSR range.

    SavedVGPRs.reset(Reg);


    for (MachineBasicBlock &MBB : MF) {

      MBB.removeLiveIn(Reg);

      MBB.sortUniqueLiveIns();

    }


    Reg = NewReg;

  }

}


bool SIMachineFunctionInfo::allocateVirtualVGPRForSGPRSpills(

    MachineFunction &MF, int FI, unsigned LaneIndex) {

  MachineRegisterInfo &MRI = MF.getRegInfo();

  Register LaneVGPR;

  if (!LaneIndex) {

    LaneVGPR = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);

    SpillVGPRs.push_back(LaneVGPR);

  } else {

    LaneVGPR = SpillVGPRs.back();

  }


  SGPRSpillsToVirtualVGPRLanes[FI].emplace_back(LaneVGPR, LaneIndex);

  return true;

}


bool SIMachineFunctionInfo::allocatePhysicalVGPRForSGPRSpills(

    MachineFunction &MF, int FI, unsigned LaneIndex, bool IsPrologEpilog) {

  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIRegisterInfo *TRI = ST.getRegisterInfo();

  MachineRegisterInfo &MRI = MF.getRegInfo();

  Register LaneVGPR;

  if (!LaneIndex) {

    // Find the highest available register if called before RA to ensure the

    // lowest registers are available for allocation. The LaneVGPR, in that

    // case, will be shifted back to the lowest range after VGPR allocation.

    LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF,

                                       !IsPrologEpilog);

    if (LaneVGPR == AMDGPU::NoRegister) {

      // We have no VGPRs left for spilling SGPRs. Reset because we will not

      // partially spill the SGPR to VGPRs.

      SGPRSpillsToPhysicalVGPRLanes.erase(FI);

      return false;

    }


    if (IsPrologEpilog)

      allocateWWMSpill(MF, LaneVGPR);


    reserveWWMRegister(LaneVGPR);

    for (MachineBasicBlock &MBB : MF) {

      MBB.addLiveIn(LaneVGPR);

      MBB.sortUniqueLiveIns();

    }

    SpillPhysVGPRs.push_back(LaneVGPR);

  } else {

    LaneVGPR = SpillPhysVGPRs.back();

  }


  SGPRSpillsToPhysicalVGPRLanes[FI].emplace_back(LaneVGPR, LaneIndex);

  return true;

}


bool SIMachineFunctionInfo::allocateSGPRSpillToVGPRLane(

    MachineFunction &MF, int FI, bool SpillToPhysVGPRLane,

    bool IsPrologEpilog) {

  std::vector<SIRegisterInfo::SpilledReg> &SpillLanes =

      SpillToPhysVGPRLane ? SGPRSpillsToPhysicalVGPRLanes[FI]

                          : SGPRSpillsToVirtualVGPRLanes[FI];


  // This has already been allocated.

  if (!SpillLanes.empty())

    return true;


  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  MachineFrameInfo &FrameInfo = MF.getFrameInfo();

  unsigned WaveSize = ST.getWavefrontSize();


  unsigned Size = FrameInfo.getObjectSize(FI);

  unsigned NumLanes = Size / 4;


  if (NumLanes > WaveSize)

    return false;


  assert(Size >= 4 && "invalid sgpr spill size");

  assert(ST.getRegisterInfo()->spillSGPRToVGPR() &&

         "not spilling SGPRs to VGPRs");


  unsigned &NumSpillLanes = SpillToPhysVGPRLane ? NumPhysicalVGPRSpillLanes

                                                : NumVirtualVGPRSpillLanes;


  for (unsigned I = 0; I < NumLanes; ++I, ++NumSpillLanes) {

    unsigned LaneIndex = (NumSpillLanes % WaveSize);


    bool Allocated = SpillToPhysVGPRLane

                         ? allocatePhysicalVGPRForSGPRSpills(MF, FI, LaneIndex,

                                                             IsPrologEpilog)

                         : allocateVirtualVGPRForSGPRSpills(MF, FI, LaneIndex);

    if (!Allocated) {

      NumSpillLanes -= I;

      return false;

    }

  }


  return true;

}


/// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.

/// Either AGPR is spilled to VGPR to vice versa.

/// Returns true if a \p FI can be eliminated completely.

bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,

                                                    int FI,

                                                    bool isAGPRtoVGPR) {

  MachineRegisterInfo &MRI = MF.getRegInfo();

  MachineFrameInfo &FrameInfo = MF.getFrameInfo();

  const GCNSubtarget &ST =  MF.getSubtarget<GCNSubtarget>();


  assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));


  auto &Spill = VGPRToAGPRSpills[FI];


  // This has already been allocated.

  if (!Spill.Lanes.empty())

    return Spill.FullyAllocated;


  unsigned Size = FrameInfo.getObjectSize(FI);

  unsigned NumLanes = Size / 4;

  Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);


  const TargetRegisterClass &RC =

      isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;

  auto Regs = RC.getRegisters();


  auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;

  const SIRegisterInfo *TRI = ST.getRegisterInfo();

  Spill.FullyAllocated = true;


  // FIXME: Move allocation logic out of MachineFunctionInfo and initialize

  // once.

  BitVector OtherUsedRegs;

  OtherUsedRegs.resize(TRI->getNumRegs());


  const uint32_t *CSRMask =

      TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());

  if (CSRMask)

    OtherUsedRegs.setBitsInMask(CSRMask);


  // TODO: Should include register tuples, but doesn't matter with current

  // usage.

  for (MCPhysReg Reg : SpillAGPR)

    OtherUsedRegs.set(Reg);

  for (MCPhysReg Reg : SpillVGPR)

    OtherUsedRegs.set(Reg);


  SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();

  for (int I = NumLanes - 1; I >= 0; --I) {

    NextSpillReg = std::find_if(

        NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {

          return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&

                 !OtherUsedRegs[Reg];

        });


    if (NextSpillReg == Regs.end()) { // Registers exhausted

      Spill.FullyAllocated = false;

      break;

    }


    OtherUsedRegs.set(*NextSpillReg);

    SpillRegs.push_back(*NextSpillReg);

    MRI.reserveReg(*NextSpillReg, TRI);

    Spill.Lanes[I] = *NextSpillReg++;

  }


  return Spill.FullyAllocated;

}


bool SIMachineFunctionInfo::removeDeadFrameIndices(

    MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) {

  // Remove dead frame indices from function frame, however keep FP & BP since

  // spills for them haven't been inserted yet. And also make sure to remove the

  // frame indices from `SGPRSpillsToVirtualVGPRLanes` data structure,

  // otherwise, it could result in an unexpected side effect and bug, in case of

  // any re-mapping of freed frame indices by later pass(es) like "stack slot

  // coloring".

  for (auto &R : make_early_inc_range(SGPRSpillsToVirtualVGPRLanes)) {

    MFI.RemoveStackObject(R.first);

    SGPRSpillsToVirtualVGPRLanes.erase(R.first);

  }


  // Remove the dead frame indices of CSR SGPRs which are spilled to physical

  // VGPR lanes during SILowerSGPRSpills pass.

  if (!ResetSGPRSpillStackIDs) {

    for (auto &R : make_early_inc_range(SGPRSpillsToPhysicalVGPRLanes)) {

      MFI.RemoveStackObject(R.first);

      SGPRSpillsToPhysicalVGPRLanes.erase(R.first);

    }

  }

  bool HaveSGPRToMemory = false;


  if (ResetSGPRSpillStackIDs) {

    // All other SGPRs must be allocated on the default stack, so reset the

    // stack ID.

    for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); I != E;

         ++I) {

      if (!checkIndexInPrologEpilogSGPRSpills(I)) {

        if (MFI.getStackID(I) == TargetStackID::SGPRSpill) {

          MFI.setStackID(I, TargetStackID::Default);

          HaveSGPRToMemory = true;

        }

      }

    }

  }


  for (auto &R : VGPRToAGPRSpills) {

    if (R.second.IsDead)

      MFI.RemoveStackObject(R.first);

  }


  return HaveSGPRToMemory;

}


int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI,

                                         const SIRegisterInfo &TRI) {

  if (ScavengeFI)

    return *ScavengeFI;


  ScavengeFI =

      MFI.CreateStackObject(TRI.getSpillSize(AMDGPU::SGPR_32RegClass),

                            TRI.getSpillAlign(AMDGPU::SGPR_32RegClass), false);

  return *ScavengeFI;

}


MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {

  assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");

  return AMDGPU::SGPR0 + NumUserSGPRs;

}


MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {

  return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;

}


void SIMachineFunctionInfo::MRI_NoteNewVirtualRegister(Register Reg) {

  VRegFlags.grow(Reg);

}


void SIMachineFunctionInfo::MRI_NoteCloneVirtualRegister(Register NewReg,

                                                         Register SrcReg) {

  VRegFlags.grow(NewReg);

  VRegFlags[NewReg] = VRegFlags[SrcReg];

}


Register

SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const {

  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  if (!ST.isAmdPalOS())

    return Register();

  Register GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in

  if (ST.hasMergedShaders()) {

    switch (MF.getFunction().getCallingConv()) {

    case CallingConv::AMDGPU_HS:

    case CallingConv::AMDGPU_GS:

      // Low GIT address is passed in s8 rather than s0 for an LS+HS or

      // ES+GS merged shader on gfx9+.

      GitPtrLo = AMDGPU::SGPR8;

      return GitPtrLo;

    default:

      return GitPtrLo;

    }

  }

  return GitPtrLo;

}


static yaml::StringValue regToString(Register Reg,

                                     const TargetRegisterInfo &TRI) {

  yaml::StringValue Dest;

  {

    raw_string_ostream OS(Dest.Value);

    OS << printReg(Reg, &TRI);

  }

  return Dest;

}


static std::optional<yaml::SIArgumentInfo>

convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,

                    const TargetRegisterInfo &TRI) {

  yaml::SIArgumentInfo AI;


  auto convertArg = [&](std::optional<yaml::SIArgument> &A,

                        const ArgDescriptor &Arg) {

    if (!Arg)

      return false;


    // Create a register or stack argument.

    yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister());

    if (Arg.isRegister()) {

      raw_string_ostream OS(SA.RegisterName.Value);

      OS << printReg(Arg.getRegister(), &TRI);

    } else

      SA.StackOffset = Arg.getStackOffset();

    // Check and update the optional mask.

    if (Arg.isMasked())

      SA.Mask = Arg.getMask();


    A = SA;

    return true;

  };


  // TODO: Need to serialize kernarg preloads.

  bool Any = false;

  Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);

  Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);

  Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr);

  Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);

  Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);

  Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);

  Any |= convertArg(AI.LDSKernelId, ArgInfo.LDSKernelId);

  Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);

  Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);

  Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);

  Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ);

  Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo);

  Any |= convertArg(AI.PrivateSegmentWaveByteOffset,

                    ArgInfo.PrivateSegmentWaveByteOffset);

  Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr);

  Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr);

  Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX);

  Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);

  Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);


  if (Any)

    return AI;


  return std::nullopt;

}


yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(

    const llvm::SIMachineFunctionInfo &MFI, const TargetRegisterInfo &TRI,

    const llvm::MachineFunction &MF)

    : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),

      MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),

      GDSSize(MFI.getGDSSize()), DynLDSAlign(MFI.getDynLDSAlign()),

      IsEntryFunction(MFI.isEntryFunction()),

      NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),

      MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),

      HasSpilledSGPRs(MFI.hasSpilledSGPRs()),

      HasSpilledVGPRs(MFI.hasSpilledVGPRs()),

      NumWaveDispatchSGPRs(MFI.getNumWaveDispatchSGPRs()),

      NumWaveDispatchVGPRs(MFI.getNumWaveDispatchVGPRs()),

      HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),

      Occupancy(MFI.getOccupancy()),

      ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),

      FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),

      StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),

      BytesInStackArgArea(MFI.getBytesInStackArgArea()),

      ReturnsVoid(MFI.returnsVoid()),

      ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)),

      PSInputAddr(MFI.getPSInputAddr()), PSInputEnable(MFI.getPSInputEnable()),

      MaxMemoryClusterDWords(MFI.getMaxMemoryClusterDWords()),

      Mode(MFI.getMode()), HasInitWholeWave(MFI.hasInitWholeWave()),

      IsWholeWaveFunction(MFI.isWholeWaveFunction()),

      DynamicVGPRBlockSize(MFI.getDynamicVGPRBlockSize()),

      ScratchReservedForDynamicVGPRs(MFI.getScratchReservedForDynamicVGPRs()) {

  for (Register Reg : MFI.getSGPRSpillPhysVGPRs())

    SpillPhysVGPRS.push_back(regToString(Reg, TRI));


  for (Register Reg : MFI.getWWMReservedRegs())

    WWMReservedRegs.push_back(regToString(Reg, TRI));


  if (MFI.getLongBranchReservedReg())

    LongBranchReservedReg = regToString(MFI.getLongBranchReservedReg(), TRI);

  if (MFI.getVGPRForAGPRCopy())

    VGPRForAGPRCopy = regToString(MFI.getVGPRForAGPRCopy(), TRI);


  if (MFI.getSGPRForEXECCopy())

    SGPRForEXECCopy = regToString(MFI.getSGPRForEXECCopy(), TRI);


  auto SFI = MFI.getOptionalScavengeFI();

  if (SFI)

    ScavengeFI = yaml::FrameIndex(*SFI, MF.getFrameInfo());

}


void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {

  MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);

}


bool SIMachineFunctionInfo::initializeBaseYamlFields(

    const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF,

    PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) {

  ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;

  MaxKernArgAlign = YamlMFI.MaxKernArgAlign;

  LDSSize = YamlMFI.LDSSize;

  GDSSize = YamlMFI.GDSSize;

  DynLDSAlign = YamlMFI.DynLDSAlign;

  PSInputAddr = YamlMFI.PSInputAddr;

  PSInputEnable = YamlMFI.PSInputEnable;

  MaxMemoryClusterDWords = YamlMFI.MaxMemoryClusterDWords;

  HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;

  Occupancy = YamlMFI.Occupancy;

  IsEntryFunction = YamlMFI.IsEntryFunction;

  NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;

  MemoryBound = YamlMFI.MemoryBound;

  WaveLimiter = YamlMFI.WaveLimiter;

  HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs;

  HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs;

  NumWaveDispatchSGPRs = YamlMFI.NumWaveDispatchSGPRs;

  NumWaveDispatchVGPRs = YamlMFI.NumWaveDispatchVGPRs;

  BytesInStackArgArea = YamlMFI.BytesInStackArgArea;

  ReturnsVoid = YamlMFI.ReturnsVoid;

  IsWholeWaveFunction = YamlMFI.IsWholeWaveFunction;


  if (YamlMFI.ScavengeFI) {

    auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo());

    if (!FIOrErr) {

      // Create a diagnostic for a the frame index.

      const MemoryBuffer &Buffer =

          *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());


      Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1, 1,

                           SourceMgr::DK_Error, toString(FIOrErr.takeError()),

                           "", {}, {});

      SourceRange = YamlMFI.ScavengeFI->SourceRange;

      return true;

    }

    ScavengeFI = *FIOrErr;

  } else {

    ScavengeFI = std::nullopt;

  }

  return false;

}


bool SIMachineFunctionInfo::mayUseAGPRs(const Function &F) const {

  auto [MinNumAGPR, MaxNumAGPR] =

      AMDGPU::getIntegerPairAttribute(F, "amdgpu-agpr-alloc", {~0u, ~0u},

                                      /*OnlyFirstRequired=*/true);

  return MinNumAGPR != 0u;

}

MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:103

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

AMDGPUBaseInfo.h

AMDGPUMCTargetDesc.h
Provides AMDGPU specific target descriptions.

AMDGPUSubtarget.h
Base class for AMDGPU specific classes of TargetSubtarget.

MBB
MachineBasicBlock & MBB
Definition: ARMSLSHardening.cpp:71

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

CallingConv.h

Idx
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Definition: DeadArgumentElimination.cpp:347

DiagnosticInfo.h

Size
uint64_t Size
Definition: ELFObjHandler.cpp:81

YamlIO
IO & YamlIO
Definition: ELFYAML.cpp:1327

GCNSubtarget.h
AMD GCN specific subclass of TargetSubtarget.

Function.h

LiveIntervals.h

F
#define F(x, y, z)
Definition: MD5.cpp:55

I
#define I(x, y, z)
Definition: MD5.cpp:58

MIParser.h

MachineBasicBlock.h

MachineFrameInfo.h

MachineFunction.h

MachineRegisterInfo.h

TRI
Register const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:2118

MFMAVGPRForm
static cl::opt< bool > MFMAVGPRForm("amdgpu-mfma-vgpr-form", cl::Hidden, cl::desc("Whether to force use VGPR for Opc and Dest of MFMA. If " "unspecified, default to compiler heuristics"), cl::init(false))

MAX_LANES
@ MAX_LANES
Definition: SIMachineFunctionInfo.cpp:28

getTM
const GCNTargetMachine & getTM(const GCNSubtarget *STI)
Definition: SIMachineFunctionInfo.cpp:42

convertArgumentInfo
static std::optional< yaml::SIArgumentInfo > convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo, const TargetRegisterInfo &TRI)
Definition: SIMachineFunctionInfo.cpp:668

regToString
static yaml::StringValue regToString(Register Reg, const TargetRegisterInfo &TRI)
Definition: SIMachineFunctionInfo.cpp:657

SIMachineFunctionInfo.h

SIRegisterInfo.h
Interface definition for SIRegisterInfo.

OS
raw_pwrite_stream & OS
Definition: SampleProfWriter.cpp:51

llvm::AMDGPUArgumentUsageInfo::FixedABIFunctionInfo
static const AMDGPUFunctionArgInfo FixedABIFunctionInfo
Definition: AMDGPUArgumentUsageInfo.h:182

llvm::AMDGPUMachineFunction
Definition: AMDGPUMachineFunction.h:24

llvm::AMDGPUMachineFunction::getLDSSize
uint32_t getLDSSize() const
Definition: AMDGPUMachineFunction.h:83

llvm::AMDGPUMachineFunction::MaxKernArgAlign
Align MaxKernArgAlign
Definition: AMDGPUMachineFunction.h:31

llvm::AMDGPUMachineFunction::isChainFunction
bool isChainFunction() const
Definition: AMDGPUMachineFunction.h:103

llvm::AMDGPUMachineFunction::hasInitWholeWave
bool hasInitWholeWave() const
Definition: AMDGPUMachineFunction.h:122

llvm::AMDGPUMachineFunction::isEntryFunction
bool isEntryFunction() const
Definition: AMDGPUMachineFunction.h:97

llvm::AMDGPUSubtarget::GFX9
@ GFX9
Definition: AMDGPUSubtarget.h:41

llvm::Any
Definition: Any.h:28

llvm::Attribute
Definition: Attributes.h:69

llvm::BitVector
Definition: BitVector.h:82

llvm::BitVector::reset
BitVector & reset()
Definition: BitVector.h:392

llvm::BitVector::resize
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:341

llvm::BitVector::set
BitVector & set()
Definition: BitVector.h:351

llvm::BitVector::setBitsInMask
void setBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
setBitsInMask - Add '1' bits from Mask to this vector.
Definition: BitVector.h:707

llvm::BitVector::push_back
void push_back(bool Val)
Definition: BitVector.h:466

llvm::BumpPtrAllocatorImpl
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:67

llvm::DenseMap
Definition: DenseMap.h:730

llvm::Error
Lightweight error class with error context and mandatory checking.
Definition: Error.h:159

llvm::Function
Definition: Function.h:64

llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:270

llvm::GCNSubtarget
Definition: GCNSubtarget.h:34

llvm::GCNSubtarget::getTargetLowering
const SITargetLowering * getTargetLowering() const override
Definition: GCNSubtarget.h:316

llvm::GCNTargetMachine
Definition: AMDGPUTargetMachine.h:81

llvm::GCNUserSGPRUsageInfo::allocKernargPreloadSGPRs
void allocKernargPreloadSGPRs(unsigned NumSGPRs)
Definition: GCNSubtarget.cpp:755

llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:122

llvm::MachineBasicBlock::removeLiveIn
LLVM_ABI void removeLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Remove the specified register from the live in set.
Definition: MachineBasicBlock.cpp:598

llvm::MachineBasicBlock::sortUniqueLiveIns
LLVM_ABI void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
Definition: MachineBasicBlock.cpp:622

llvm::MachineBasicBlock::addLiveIn
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
Definition: MachineBasicBlock.h:478

llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:108

llvm::MachineFrameInfo::CreateStackObject
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Definition: MachineFrameInfo.cpp:51

llvm::MachineFrameInfo::CreateSpillStackObject
LLVM_ABI int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
Definition: MachineFrameInfo.cpp:66

llvm::MachineFrameInfo::setStackID
void setStackID(int ObjectIdx, uint8_t ID)
Definition: MachineFrameInfo.h:757

llvm::MachineFrameInfo::hasTailCall
bool hasTailCall() const
Returns true if the function contains a tail call.
Definition: MachineFrameInfo.h:648

llvm::MachineFrameInfo::isSpillSlotObjectIndex
bool isSpillSlotObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a spill slot.
Definition: MachineFrameInfo.h:739

llvm::MachineFrameInfo::getObjectSize
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
Definition: MachineFrameInfo.h:474

llvm::MachineFrameInfo::RemoveStackObject
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
Definition: MachineFrameInfo.h:799

llvm::MachineFrameInfo::getObjectIndexEnd
int getObjectIndexEnd() const
Return one past the maximum frame object index.
Definition: MachineFrameInfo.h:414

llvm::MachineFrameInfo::getStackID
uint8_t getStackID(int ObjectIdx) const
Definition: MachineFrameInfo.h:752

llvm::MachineFrameInfo::getObjectIndexBegin
int getObjectIndexBegin() const
Return the minimum frame object index.
Definition: MachineFrameInfo.h:411

llvm::MachineFunction
Definition: MachineFunction.h:286

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:762

llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition: MachineFunction.h:778

llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:772

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:733

llvm::MachineFunction::cloneInfo
Ty * cloneInfo(const Ty &Old)
Definition: MachineFunction.h:869

llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:53

llvm::MachineRegisterInfo::getCalleeSavedRegs
LLVM_ABI const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
Definition: MachineRegisterInfo.cpp:640

llvm::MapVector::count
size_type count(const KeyT &Key) const
Definition: MapVector.h:139

llvm::MapVector::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: MapVector.h:115

llvm::MemoryBuffer
This interface provides simple read-only access to a block of memory, and provides simple methods for...
Definition: MemoryBuffer.h:52

llvm::MemoryBuffer::getBufferIdentifier
virtual StringRef getBufferIdentifier() const
Return an identifier for this buffer, typically the filename it was read from.
Definition: MemoryBuffer.h:77

llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19

llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:412

llvm::SIMachineFunctionInfo::initializeBaseYamlFields
bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange)
Definition: SIMachineFunctionInfo.cpp:770

llvm::SIMachineFunctionInfo::shiftWwmVGPRsToLowestRange
void shiftWwmVGPRsToLowestRange(MachineFunction &MF, SmallVectorImpl< Register > &WWMVGPRs, BitVector &SavedVGPRs)
Definition: SIMachineFunctionInfo.cpp:356

llvm::SIMachineFunctionInfo::addPrivateSegmentSize
Register addPrivateSegmentSize(const SIRegisterInfo &TRI)
Definition: SIMachineFunctionInfo.cpp:258

llvm::SIMachineFunctionInfo::allocateWWMSpill
void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size=4, Align Alignment=Align(4))
Definition: SIMachineFunctionInfo.cpp:308

llvm::SIMachineFunctionInfo::addDispatchPtr
Register addDispatchPtr(const SIRegisterInfo &TRI)
Definition: SIMachineFunctionInfo.cpp:222

llvm::SIMachineFunctionInfo::getLongBranchReservedReg
Register getLongBranchReservedReg() const
Definition: SIMachineFunctionInfo.h:1048

llvm::SIMachineFunctionInfo::addFlatScratchInit
Register addFlatScratchInit(const SIRegisterInfo &TRI)
Definition: SIMachineFunctionInfo.cpp:251

llvm::SIMachineFunctionInfo::getMaxWavesPerEU
unsigned getMaxWavesPerEU() const
Definition: SIMachineFunctionInfo.h:1162

llvm::SIMachineFunctionInfo::getSGPRSpillPhysVGPRs
ArrayRef< Register > getSGPRSpillPhysVGPRs() const
Definition: SIMachineFunctionInfo.h:675

llvm::SIMachineFunctionInfo::getScavengeFI
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
Definition: SIMachineFunctionInfo.cpp:606

llvm::SIMachineFunctionInfo::addQueuePtr
Register addQueuePtr(const SIRegisterInfo &TRI)
Definition: SIMachineFunctionInfo.cpp:229

llvm::SIMachineFunctionInfo::SIMachineFunctionInfo
SIMachineFunctionInfo(const SIMachineFunctionInfo &MFI)=default

llvm::SIMachineFunctionInfo::getGITPtrLoReg
Register getGITPtrLoReg(const MachineFunction &MF) const
Definition: SIMachineFunctionInfo.cpp:637

llvm::SIMachineFunctionInfo::allocateVGPRSpillToAGPR
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)
Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.
Definition: SIMachineFunctionInfo.cpp:495

llvm::SIMachineFunctionInfo::splitWWMSpillRegisters
void splitWWMSpillRegisters(MachineFunction &MF, SmallVectorImpl< std::pair< Register, int > > &CalleeSavedRegs, SmallVectorImpl< std::pair< Register, int > > &ScratchRegs) const
Definition: SIMachineFunctionInfo.cpp:333

llvm::SIMachineFunctionInfo::getSGPRForEXECCopy
Register getSGPRForEXECCopy() const
Definition: SIMachineFunctionInfo.h:793

llvm::SIMachineFunctionInfo::mayUseAGPRs
bool mayUseAGPRs(const Function &F) const
Definition: SIMachineFunctionInfo.cpp:815

llvm::SIMachineFunctionInfo::isCalleeSavedReg
bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) const
Definition: SIMachineFunctionInfo.cpp:346

llvm::SIMachineFunctionInfo::addLDSKernelId
Register addLDSKernelId()
Definition: SIMachineFunctionInfo.cpp:271

llvm::SIMachineFunctionInfo::getVGPRForAGPRCopy
Register getVGPRForAGPRCopy() const
Definition: SIMachineFunctionInfo.h:617

llvm::SIMachineFunctionInfo::allocateSGPRSpillToVGPRLane
bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool SpillToPhysVGPRLane=false, bool IsPrologEpilog=false)
Definition: SIMachineFunctionInfo.cpp:448

llvm::SIMachineFunctionInfo::addKernargSegmentPtr
Register addKernargSegmentPtr(const SIRegisterInfo &TRI)
Definition: SIMachineFunctionInfo.cpp:236

llvm::SIMachineFunctionInfo::addDispatchID
Register addDispatchID(const SIRegisterInfo &TRI)
Definition: SIMachineFunctionInfo.cpp:244

llvm::SIMachineFunctionInfo::removeDeadFrameIndices
bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)
If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.
Definition: SIMachineFunctionInfo.cpp:561

llvm::SIMachineFunctionInfo::clone
MachineFunctionInfo * clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, const DenseMap< MachineBasicBlock *, MachineBasicBlock * > &Src2DstMBB) const override
Make a functionally equivalent copy of this MachineFunctionInfo in MF.
Definition: SIMachineFunctionInfo.cpp:200

llvm::SIMachineFunctionInfo::checkIndexInPrologEpilogSGPRSpills
bool checkIndexInPrologEpilogSGPRSpills(int FI) const
Definition: SIMachineFunctionInfo.h:738

llvm::SIMachineFunctionInfo::addPrivateSegmentBuffer
Register addPrivateSegmentBuffer(const SIRegisterInfo &TRI)
Definition: SIMachineFunctionInfo.cpp:213

llvm::SIMachineFunctionInfo::getWWMReservedRegs
const ReservedRegSet & getWWMReservedRegs() const
Definition: SIMachineFunctionInfo.h:678

llvm::SIMachineFunctionInfo::getOptionalScavengeFI
std::optional< int > getOptionalScavengeFI() const
Definition: SIMachineFunctionInfo.h:830

llvm::SIMachineFunctionInfo::addImplicitBufferPtr
Register addImplicitBufferPtr(const SIRegisterInfo &TRI)
Definition: SIMachineFunctionInfo.cpp:264

llvm::SIMachineFunctionInfo::limitOccupancy
void limitOccupancy(const MachineFunction &MF)
Definition: SIMachineFunctionInfo.cpp:207

llvm::SIMachineFunctionInfo::addPreloadedKernArg
SmallVectorImpl< MCRegister > * addPreloadedKernArg(const SIRegisterInfo &TRI, const TargetRegisterClass *RC, unsigned AllocSizeDWord, int KernArgIdx, int PaddingSGPRs)
Definition: SIMachineFunctionInfo.cpp:277

llvm::SIMachineFunctionInfo::reserveWWMRegister
void reserveWWMRegister(Register Reg)
Definition: SIMachineFunctionInfo.h:654

llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:40

llvm::SIRegisterInfo::isChainScratchRegister
static bool isChainScratchRegister(Register VGPR)
Definition: SIRegisterInfo.cpp:454

llvm::SITargetLowering
Definition: SIISelLowering.h:31

llvm::SMDiagnostic
Instances of this class encapsulate one diagnostic report, allowing printing to a raw_ostream as a ca...
Definition: SourceMgr.h:282

llvm::SMLoc
Represents a location in source code.
Definition: SMLoc.h:23

llvm::SMRange
Represents a range in source code.
Definition: SMLoc.h:48

llvm::SetVector::remove
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:198

llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:168

llvm::SmallVectorBase::size
size_t size() const
Definition: SmallVector.h:79

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574

llvm::SmallVectorImpl::const_iterator
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:579

llvm::SmallVectorTemplateCommon::end
iterator end()
Definition: SmallVector.h:270

llvm::SourceMgr::getMainFileID
unsigned getMainFileID() const
Definition: SourceMgr.h:133

llvm::SourceMgr::DK_Error
@ DK_Error
Definition: SourceMgr.h:35

llvm::SourceMgr::getMemoryBuffer
const MemoryBuffer * getMemoryBuffer(unsigned i) const
Definition: SourceMgr.h:126

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55

llvm::StringRef::consumeInteger
bool consumeInteger(unsigned Radix, T &Result)
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:509

llvm::StringRef::empty
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:151

llvm::TargetLoweringBase::getTargetMachine
const TargetMachine & getTargetMachine() const
Definition: TargetLowering.h:373

llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:45

llvm::TargetRegisterClass::getRegisters
ArrayRef< MCPhysReg > getRegisters() const
Definition: TargetRegisterInfo.h:84

llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:237

llvm::cl::opt
Definition: CommandLine.h:1429

llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:662

uint16_t

uint32_t

uint64_t

unsigned

false
Definition: MachinePipeliner.cpp:239

llvm::AMDGPU::getInitialPSInputAddr
unsigned getInitialPSInputAddr(const Function &F)
Definition: AMDGPUBaseInfo.cpp:2378

llvm::AMDGPU::getDynamicVGPRBlockSize
unsigned getDynamicVGPRBlockSize(const Function &F)
Definition: AMDGPUBaseInfo.cpp:2393

llvm::AMDGPU::isChainCC
LLVM_READNONE constexpr bool isChainCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.h:1472

llvm::AMDGPU::getIntegerPairAttribute
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
Definition: AMDGPUBaseInfo.cpp:1600

llvm::AMDGPU::isGraphics
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.h:1443

llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73

llvm::CallingConv::AMDGPU_CS
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:197

llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:200

llvm::CallingConv::AMDGPU_Gfx
@ AMDGPU_Gfx
Used for AMD graphics targets.
Definition: CallingConv.h:232

llvm::CallingConv::AMDGPU_HS
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:206

llvm::CallingConv::AMDGPU_GS
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:191

llvm::CallingConv::AMDGPU_PS
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:194

llvm::CallingConv::SPIR_KERNEL
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144

llvm::CallingConv::AMDGPU_Gfx_WholeWave
@ AMDGPU_Gfx_WholeWave
Definition: CallingConv.h:288

llvm::TargetStackID::SGPRSpill
@ SGPRSpill
Definition: TargetFrameLowering.h:32

llvm::TargetStackID::Default
@ Default
Definition: TargetFrameLowering.h:31

llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:138

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::find
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1770

llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21

llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:663

llvm::DefaultMemoryClusterDWordsLimit
constexpr unsigned DefaultMemoryClusterDWordsLimit
Definition: SIInstrInfo.h:40

llvm::toString
const char * toString(DWARFSectionKind Kind)
Definition: DWARFUnitIndex.h:68

llvm::printReg
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Definition: TargetRegisterInfo.cpp:107

llvm::AMDGPUFunctionArgInfo
Definition: AMDGPUArgumentUsageInfo.h:103

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39

llvm::ArgDescriptor
Definition: AMDGPUArgumentUsageInfo.h:25

llvm::ArgDescriptor::createRegister
static ArgDescriptor createRegister(Register Reg, unsigned Mask=~0u)
Definition: AMDGPUArgumentUsageInfo.h:46

llvm::ArgInfo
Helper struct shared between Function Specialization and SCCP Solver.
Definition: SCCPSolver.h:42

llvm::MachineFunctionInfo
MachineFunctionInfo - This class can be derived from and used by targets to hold private target-speci...
Definition: MachineFunction.h:104

llvm::PerFunctionMIParsingState
Definition: MIParser.h:165

llvm::PerFunctionMIParsingState::SM
SourceMgr * SM
Definition: MIParser.h:168

llvm::cl::desc
Definition: CommandLine.h:410

llvm::yaml::FrameIndex
A serializaable representation of a reference to a stack object or fixed stack object.
Definition: MIRYamlMapping.h:417

llvm::yaml::MappingTraits
Definition: ModuleSummaryIndex.h:57

llvm::yaml::SIArgumentInfo
Definition: SIMachineFunctionInfo.h:165

llvm::yaml::SIArgumentInfo::PrivateSegmentWaveByteOffset
std::optional< SIArgument > PrivateSegmentWaveByteOffset
Definition: SIMachineFunctionInfo.h:179

llvm::yaml::SIArgumentInfo::WorkGroupIDY
std::optional< SIArgument > WorkGroupIDY
Definition: SIMachineFunctionInfo.h:175

llvm::yaml::SIArgumentInfo::FlatScratchInit
std::optional< SIArgument > FlatScratchInit
Definition: SIMachineFunctionInfo.h:171

llvm::yaml::SIArgumentInfo::DispatchPtr
std::optional< SIArgument > DispatchPtr
Definition: SIMachineFunctionInfo.h:167

llvm::yaml::SIArgumentInfo::DispatchID
std::optional< SIArgument > DispatchID
Definition: SIMachineFunctionInfo.h:170

llvm::yaml::SIArgumentInfo::WorkItemIDY
std::optional< SIArgument > WorkItemIDY
Definition: SIMachineFunctionInfo.h:185

llvm::yaml::SIArgumentInfo::WorkGroupIDX
std::optional< SIArgument > WorkGroupIDX
Definition: SIMachineFunctionInfo.h:174

llvm::yaml::SIArgumentInfo::ImplicitArgPtr
std::optional< SIArgument > ImplicitArgPtr
Definition: SIMachineFunctionInfo.h:181

llvm::yaml::SIArgumentInfo::QueuePtr
std::optional< SIArgument > QueuePtr
Definition: SIMachineFunctionInfo.h:168

llvm::yaml::SIArgumentInfo::WorkGroupInfo
std::optional< SIArgument > WorkGroupInfo
Definition: SIMachineFunctionInfo.h:177

llvm::yaml::SIArgumentInfo::LDSKernelId
std::optional< SIArgument > LDSKernelId
Definition: SIMachineFunctionInfo.h:178

llvm::yaml::SIArgumentInfo::ImplicitBufferPtr
std::optional< SIArgument > ImplicitBufferPtr
Definition: SIMachineFunctionInfo.h:182

llvm::yaml::SIArgumentInfo::WorkItemIDX
std::optional< SIArgument > WorkItemIDX
Definition: SIMachineFunctionInfo.h:184

llvm::yaml::SIArgumentInfo::KernargSegmentPtr
std::optional< SIArgument > KernargSegmentPtr
Definition: SIMachineFunctionInfo.h:169

llvm::yaml::SIArgumentInfo::WorkItemIDZ
std::optional< SIArgument > WorkItemIDZ
Definition: SIMachineFunctionInfo.h:186

llvm::yaml::SIArgumentInfo::PrivateSegmentSize
std::optional< SIArgument > PrivateSegmentSize
Definition: SIMachineFunctionInfo.h:172

llvm::yaml::SIArgumentInfo::PrivateSegmentBuffer
std::optional< SIArgument > PrivateSegmentBuffer
Definition: SIMachineFunctionInfo.h:166

llvm::yaml::SIArgumentInfo::WorkGroupIDZ
std::optional< SIArgument > WorkGroupIDZ
Definition: SIMachineFunctionInfo.h:176

llvm::yaml::SIArgument
Definition: SIMachineFunctionInfo.h:91

llvm::yaml::SIArgument::StackOffset
unsigned StackOffset
Definition: SIMachineFunctionInfo.h:95

llvm::yaml::SIArgument::Mask
std::optional< unsigned > Mask
Definition: SIMachineFunctionInfo.h:97

llvm::yaml::SIArgument::RegisterName
StringValue RegisterName
Definition: SIMachineFunctionInfo.h:94

llvm::yaml::SIArgument::createArgument
static SIArgument createArgument(bool IsReg)
Definition: SIMachineFunctionInfo.h:132

llvm::yaml::SIMachineFunctionInfo
Definition: SIMachineFunctionInfo.h:260

llvm::yaml::SIMachineFunctionInfo::MaxMemoryClusterDWords
unsigned MaxMemoryClusterDWords
Definition: SIMachineFunctionInfo.h:294

llvm::yaml::SIMachineFunctionInfo::HasSpilledVGPRs
bool HasSpilledVGPRs
Definition: SIMachineFunctionInfo.h:272

llvm::yaml::SIMachineFunctionInfo::SGPRForEXECCopy
StringValue SGPRForEXECCopy
Definition: SIMachineFunctionInfo.h:299

llvm::yaml::SIMachineFunctionInfo::HasSpilledSGPRs
bool HasSpilledSGPRs
Definition: SIMachineFunctionInfo.h:271

llvm::yaml::SIMachineFunctionInfo::ReturnsVoid
bool ReturnsVoid
Definition: SIMachineFunctionInfo.h:288

llvm::yaml::SIMachineFunctionInfo::DynLDSAlign
Align DynLDSAlign
Definition: SIMachineFunctionInfo.h:265

llvm::yaml::SIMachineFunctionInfo::WWMReservedRegs
SmallVector< StringValue > WWMReservedRegs
Definition: SIMachineFunctionInfo.h:281

llvm::yaml::SIMachineFunctionInfo::HighBitsOf32BitAddress
uint32_t HighBitsOf32BitAddress
Definition: SIMachineFunctionInfo.h:275

llvm::yaml::SIMachineFunctionInfo::GDSSize
uint32_t GDSSize
Definition: SIMachineFunctionInfo.h:264

llvm::yaml::SIMachineFunctionInfo::MemoryBound
bool MemoryBound
Definition: SIMachineFunctionInfo.h:269

llvm::yaml::SIMachineFunctionInfo::Occupancy
unsigned Occupancy
Definition: SIMachineFunctionInfo.h:278

llvm::yaml::SIMachineFunctionInfo::PSInputEnable
unsigned PSInputEnable
Definition: SIMachineFunctionInfo.h:293

llvm::yaml::SIMachineFunctionInfo::MaxKernArgAlign
Align MaxKernArgAlign
Definition: SIMachineFunctionInfo.h:262

llvm::yaml::SIMachineFunctionInfo::SIMachineFunctionInfo
SIMachineFunctionInfo()=default

llvm::yaml::SIMachineFunctionInfo::LongBranchReservedReg
StringValue LongBranchReservedReg
Definition: SIMachineFunctionInfo.h:300

llvm::yaml::SIMachineFunctionInfo::ExplicitKernArgSize
uint64_t ExplicitKernArgSize
Definition: SIMachineFunctionInfo.h:261

llvm::yaml::SIMachineFunctionInfo::LDSSize
uint32_t LDSSize
Definition: SIMachineFunctionInfo.h:263

llvm::yaml::SIMachineFunctionInfo::WaveLimiter
bool WaveLimiter
Definition: SIMachineFunctionInfo.h:270

llvm::yaml::SIMachineFunctionInfo::PSInputAddr
unsigned PSInputAddr
Definition: SIMachineFunctionInfo.h:292

llvm::yaml::SIMachineFunctionInfo::IsWholeWaveFunction
bool IsWholeWaveFunction
Definition: SIMachineFunctionInfo.h:303

llvm::yaml::SIMachineFunctionInfo::NumWaveDispatchSGPRs
uint16_t NumWaveDispatchSGPRs
Definition: SIMachineFunctionInfo.h:273

llvm::yaml::SIMachineFunctionInfo::mappingImpl
void mappingImpl(yaml::IO &YamlIO) override
Definition: SIMachineFunctionInfo.cpp:766

llvm::yaml::SIMachineFunctionInfo::NoSignedZerosFPMath
bool NoSignedZerosFPMath
Definition: SIMachineFunctionInfo.h:268

llvm::yaml::SIMachineFunctionInfo::IsEntryFunction
bool IsEntryFunction
Definition: SIMachineFunctionInfo.h:266

llvm::yaml::SIMachineFunctionInfo::VGPRForAGPRCopy
StringValue VGPRForAGPRCopy
Definition: SIMachineFunctionInfo.h:298

llvm::yaml::SIMachineFunctionInfo::SpillPhysVGPRS
SmallVector< StringValue, 2 > SpillPhysVGPRS
Definition: SIMachineFunctionInfo.h:280

llvm::yaml::SIMachineFunctionInfo::ScavengeFI
std::optional< FrameIndex > ScavengeFI
Definition: SIMachineFunctionInfo.h:297

llvm::yaml::SIMachineFunctionInfo::NumWaveDispatchVGPRs
uint16_t NumWaveDispatchVGPRs
Definition: SIMachineFunctionInfo.h:274

llvm::yaml::SIMachineFunctionInfo::BytesInStackArgArea
unsigned BytesInStackArgArea
Definition: SIMachineFunctionInfo.h:287

llvm::yaml::StringValue
A wrapper around std::string which contains a source range that's being set during parsing.
Definition: MIRYamlMapping.h:34

llvm::yaml::StringValue::Value
std::string Value
Definition: MIRYamlMapping.h:35