38#define DEBUG_TYPE "amdgpu-preload-kern-arg-prolog"
50class AMDGPUPreloadKernArgProlog {
66 void createBackCompatBlock(
unsigned NumKernArgPreloadSGPRs);
71 unsigned NumKernArgPreloadSGPRs);
81 return "AMDGPU Preload Kernel Arguments Prolog";
89char AMDGPUPreloadKernArgPrologLegacy::ID = 0;
92 "AMDGPU Preload Kernel Arguments Prolog",
false,
false)
95 AMDGPUPreloadKernArgPrologLegacy::
ID;
98 return new AMDGPUPreloadKernArgPrologLegacy();
101bool AMDGPUPreloadKernArgPrologLegacy::runOnMachineFunction(
103 return AMDGPUPreloadKernArgProlog(MF).run();
106AMDGPUPreloadKernArgProlog::AMDGPUPreloadKernArgProlog(
MachineFunction &MF)
109 TRI(*
ST.getRegisterInfo()) {}
111bool AMDGPUPreloadKernArgProlog::run() {
112 if (!
ST.needsKernArgPreloadProlog())
115 unsigned NumKernArgPreloadSGPRs = MFI.getNumKernargPreloadedSGPRs();
116 if (!NumKernArgPreloadSGPRs)
119 createBackCompatBlock(NumKernArgPreloadSGPRs);
123void AMDGPUPreloadKernArgProlog::createBackCompatBlock(
124 unsigned NumKernArgPreloadSGPRs) {
125 auto KernelEntryMBB = MF.
begin();
127 MF.
insert(KernelEntryMBB, BackCompatMBB);
129 assert(MFI.getUserSGPRInfo().hasKernargSegmentPtr() &&
130 "Kernel argument segment pointer register not set.");
131 Register KernArgSegmentPtr = MFI.getArgInfo().KernargSegmentPtr.getRegister();
132 BackCompatMBB->
addLiveIn(KernArgSegmentPtr);
135 addBackCompatLoads(BackCompatMBB, KernArgSegmentPtr, NumKernArgPreloadSGPRs);
146 .
addMBB(&*KernelEntryMBB);
159 unsigned NumKernArgPreloadSGPRs) {
160 static constexpr LoadConfig Configs[] = {
161 {8, &AMDGPU::SReg_256RegClass, AMDGPU::S_LOAD_DWORDX8_IMM},
162 {4, &AMDGPU::SReg_128RegClass, AMDGPU::S_LOAD_DWORDX4_IMM},
163 {2, &AMDGPU::SReg_64RegClass, AMDGPU::S_LOAD_DWORDX2_IMM}};
165 for (
const auto &
Config : Configs) {
166 if (NumKernArgPreloadSGPRs >=
Config.Size) {
167 Register LoadReg =
TRI.getMatchingSuperReg(KernArgPreloadSGPR,
168 AMDGPU::sub0,
Config.RegClass);
178 return LoadConfig{1, &AMDGPU::SReg_32RegClass, AMDGPU::S_LOAD_DWORD_IMM,
182void AMDGPUPreloadKernArgProlog::addBackCompatLoads(
184 unsigned NumKernArgPreloadSGPRs) {
185 Register KernArgPreloadSGPR = MFI.getArgInfo().FirstKernArgPreloadReg;
189 while (NumKernArgPreloadSGPRs > 0) {
194 .
addReg(KernArgSegmentPtr)
199 KernArgPreloadSGPR = KernArgPreloadSGPR.
asMCReg() +
Config.Size;
200 NumKernArgPreloadSGPRs -=
Config.Size;
207 if (!AMDGPUPreloadKernArgProlog(MF).
run())
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static LoadConfig getLoadParameters(const TargetRegisterInfo &TRI, Register KernArgPreloadSGPR, unsigned NumKernArgPreloadSGPRs)
Find the largest possible load size that fits with SGPR alignment.
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
Register const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static const uint32_t IV[8]
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &AM)
A container for analyses that lazily runs them and caches their results.
FunctionPass class - This class is used to implement most global optimizations.
void setAlignment(Align A)
Set alignment of the basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
StringRef - Represent a constant reference to a string, i.e.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
self_iterator getIterator()
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
unsigned getVmcntBitMask(const IsaVersion &Version)
unsigned getExpcntBitMask(const IsaVersion &Version)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
This is an optimization pass for GlobalISel generic memory operations.
char & AMDGPUPreloadKernArgPrologLegacyID
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
FunctionPass * createAMDGPUPreloadKernArgPrologLegacyPass()
Instruction set architecture version.
This struct is a compact representation of a valid (non-zero power of two) alignment.