30#define DEBUG_TYPE "amdgpu-nsa-reassign"
33 "Number of NSA instructions with non-sequential address found");
35 "Number of NSA instructions changed to sequential");
38class GCNNSAReassignImpl {
41 : VRM(VM), LRM(LM), LIS(LS) {}
46 using NSA_Status =
enum {
73 unsigned StartReg)
const;
75 bool canAssign(
unsigned StartReg,
unsigned NumRegs)
const;
111char GCNNSAReassignLegacy::
ID = 0;
115bool GCNNSAReassignImpl::tryAssignRegisters(
117 unsigned NumRegs = Intervals.size();
119 for (
unsigned N = 0;
N < NumRegs; ++
N)
120 if (VRM->hasPhys(Intervals[
N]->reg()))
121 LRM->unassign(*Intervals[
N]);
123 for (
unsigned N = 0;
N < NumRegs; ++
N)
127 for (
unsigned N = 0;
N < NumRegs; ++
N)
133bool GCNNSAReassignImpl::canAssign(
unsigned StartReg,
unsigned NumRegs)
const {
134 for (
unsigned N = 0;
N < NumRegs; ++
N) {
135 unsigned Reg = StartReg +
N;
136 if (!
MRI->isAllocatable(Reg))
139 for (
unsigned I = 0; CSRegs[
I]; ++
I)
140 if (
TRI->isSubRegisterEq(Reg, CSRegs[
I]) &&
141 !LRM->isPhysRegUsed(CSRegs[
I]))
148bool GCNNSAReassignImpl::scavengeRegs(
150 unsigned NumRegs = Intervals.
size();
152 if (NumRegs > MaxNumVGPRs)
154 unsigned MaxReg = MaxNumVGPRs - NumRegs + AMDGPU::VGPR0;
156 for (
unsigned Reg = AMDGPU::VGPR0;
Reg <= MaxReg; ++
Reg) {
157 if (!canAssign(Reg, NumRegs))
160 if (tryAssignRegisters(Intervals, Reg))
167GCNNSAReassignImpl::NSA_Status
171 return NSA_Status::NOT_NSA;
173 switch (
Info->MIMGEncoding) {
174 case AMDGPU::MIMGEncGfx10NSA:
175 case AMDGPU::MIMGEncGfx11NSA:
178 return NSA_Status::NOT_NSA;
182 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vaddr0);
184 unsigned VgprBase = 0;
186 for (
unsigned I = 0;
I <
Info->VAddrOperands; ++
I) {
189 if (
Reg.isPhysical() || !VRM->isAssignedReg(Reg))
190 return NSA_Status::FIXED;
192 Register PhysReg = VRM->getPhys(Reg);
196 return NSA_Status::FIXED;
207 if (
TRI->getRegSizeInBits(*
MRI->getRegClass(Reg)) != 32 ||
Op.getSubReg())
208 return NSA_Status::FIXED;
215 if (VRM->getPreSplitReg(Reg))
216 return NSA_Status::FIXED;
220 if (Def &&
Def->isCopy() &&
Def->getOperand(1).getReg() == PhysReg)
221 return NSA_Status::FIXED;
223 for (
auto U :
MRI->use_nodbg_operands(Reg)) {
225 return NSA_Status::FIXED;
228 return NSA_Status::FIXED;
231 if (!LIS->hasInterval(Reg))
232 return NSA_Status::FIXED;
237 else if (VgprBase +
I != PhysReg)
241 return NSA ? NSA_Status::NON_CONTIGUOUS : NSA_Status::CONTIGUOUS;
246 if (!
ST->hasNSAEncoding() || !
ST->hasNonNSAEncoding())
250 TRI =
ST->getRegisterInfo();
253 MaxNumVGPRs =
ST->getMaxNumVGPRs(MF);
254 MaxNumVGPRs = std::min(
257 CSRegs =
MRI->getCalleeSavedRegs();
259 using Candidate = std::pair<const MachineInstr*, bool>;
263 switch (CheckNSA(
MI)) {
266 case NSA_Status::CONTIGUOUS:
269 case NSA_Status::NON_CONTIGUOUS:
271 ++NumNSAInstructions;
277 bool Changed =
false;
278 for (
auto &
C : Candidates) {
283 if (CheckNSA(*
MI,
true) == NSA_Status::CONTIGUOUS) {
292 AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::vaddr0);
297 for (
unsigned I = 0;
I <
Info->VAddrOperands; ++
I) {
312 MinInd = MaxInd = LIS->getInstructionIndex(*
MI);
315 MinInd =
I != 0 ? std::min(MinInd, LI->
beginIndex()) : LI->beginIndex();
316 MaxInd =
I != 0 ? std::max(MaxInd, LI->
endIndex()) : LI->endIndex();
319 if (Intervals.
empty())
323 <<
"\tOriginal allocation:\t";
329 bool Success = scavengeRegs(Intervals);
332 if (VRM->hasPhys(Intervals.back()->reg()))
337 std::lower_bound(Candidates.begin(), &
C, MinInd,
339 return LIS->getInstructionIndex(*C.first) < I;
341 for (
auto *E = Candidates.end();
342 Success &&
I != E && LIS->getInstructionIndex(*
I->first) < MaxInd;
344 if (
I->second && CheckNSA(*
I->first,
true) < NSA_Status::CONTIGUOUS) {
352 for (
unsigned I = 0;
I <
Info->VAddrOperands; ++
I)
353 if (VRM->hasPhys(Intervals[
I]->reg()))
354 LRM->unassign(*Intervals[
I]);
356 for (
unsigned I = 0;
I <
Info->VAddrOperands; ++
I)
357 LRM->assign(*Intervals[
I], OrigRegs[
I]);
365 dbgs() <<
"\tNew allocation:\t\t ["
377 auto *VRM = &getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
378 auto *LRM = &getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM();
379 auto *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
381 GCNNSAReassignImpl Impl(VRM, LRM, LIS);
392 GCNNSAReassignImpl Impl(&VRM, &LRM, &LIS);
unsigned const MachineRegisterInfo * MRI
Analysis containing CSE Info
AMD GCN specific subclass of TargetSubtarget.
Register const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Interface definition for SIRegisterInfo.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
This class represents an Operation in the Expression.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
LiveInterval - This class represents the liveness of a register, or stack slot.
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Wrapper class representing virtual and physical registers.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
unsigned getOccupancy() const
unsigned getDynamicVGPRBlockSize() const
SlotIndex - An opaque wrapper around machine indexes.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
Reg
All possible values of the reg field in the ModR/M byte.
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void initializeGCNNSAReassignLegacyPass(PassRegistry &)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.