LLVM 22.0.0git
GCNNSAReassign.cpp
Go to the documentation of this file.
1//===-- GCNNSAReassign.cpp - Reassign registers in NSA instructions -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// \brief Try to reassign registers on GFX10+ from non-sequential to sequential
11/// in NSA image instructions. Later SIShrinkInstructions pass will replace NSA
12/// with sequential versions where possible.
13///
14//===----------------------------------------------------------------------===//
15
16#include "GCNNSAReassign.h"
17#include "AMDGPU.h"
18#include "GCNSubtarget.h"
20#include "SIRegisterInfo.h"
21#include "llvm/ADT/Statistic.h"
27
28using namespace llvm;
29
30#define DEBUG_TYPE "amdgpu-nsa-reassign"
31
32STATISTIC(NumNSAInstructions,
33 "Number of NSA instructions with non-sequential address found");
34STATISTIC(NumNSAConverted,
35 "Number of NSA instructions changed to sequential");
36
37namespace {
38class GCNNSAReassignImpl {
39public:
40 GCNNSAReassignImpl(VirtRegMap *VM, LiveRegMatrix *LM, LiveIntervals *LS)
41 : VRM(VM), LRM(LM), LIS(LS) {}
42
43 bool run(MachineFunction &MF);
44
45private:
46 using NSA_Status = enum {
47 NOT_NSA, // Not an NSA instruction
48 FIXED, // NSA which we cannot modify
49 NON_CONTIGUOUS, // NSA with non-sequential address which we can try
50 // to optimize.
51 CONTIGUOUS // NSA with all sequential address registers
52 };
53
54 const GCNSubtarget *ST;
55
57
58 const SIRegisterInfo *TRI;
59
60 VirtRegMap *VRM;
61
62 LiveRegMatrix *LRM;
63
64 LiveIntervals *LIS;
65
66 unsigned MaxNumVGPRs;
67
68 const MCPhysReg *CSRegs;
69
70 NSA_Status CheckNSA(const MachineInstr &MI, bool Fast = false) const;
71
72 bool tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals,
73 unsigned StartReg) const;
74
75 bool canAssign(unsigned StartReg, unsigned NumRegs) const;
76
77 bool scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals) const;
78};
79
80class GCNNSAReassignLegacy : public MachineFunctionPass {
81public:
82 static char ID;
83
84 GCNNSAReassignLegacy() : MachineFunctionPass(ID) {
86 }
87
88 bool runOnMachineFunction(MachineFunction &MF) override;
89
90 StringRef getPassName() const override { return "GCN NSA Reassign"; };
91
92 void getAnalysisUsage(AnalysisUsage &AU) const override {
96 AU.setPreservesAll();
98 }
99};
100
101} // End anonymous namespace.
102
103INITIALIZE_PASS_BEGIN(GCNNSAReassignLegacy, DEBUG_TYPE, "GCN NSA Reassign",
104 false, false)
108INITIALIZE_PASS_END(GCNNSAReassignLegacy, DEBUG_TYPE, "GCN NSA Reassign", false,
109 false)
110
111char GCNNSAReassignLegacy::ID = 0;
112
113char &llvm::GCNNSAReassignID = GCNNSAReassignLegacy::ID;
114
115bool GCNNSAReassignImpl::tryAssignRegisters(
116 SmallVectorImpl<LiveInterval *> &Intervals, unsigned StartReg) const {
117 unsigned NumRegs = Intervals.size();
118
119 for (unsigned N = 0; N < NumRegs; ++N)
120 if (VRM->hasPhys(Intervals[N]->reg()))
121 LRM->unassign(*Intervals[N]);
122
123 for (unsigned N = 0; N < NumRegs; ++N)
124 if (LRM->checkInterference(*Intervals[N], MCRegister::from(StartReg + N)))
125 return false;
126
127 for (unsigned N = 0; N < NumRegs; ++N)
128 LRM->assign(*Intervals[N], MCRegister::from(StartReg + N));
129
130 return true;
131}
132
133bool GCNNSAReassignImpl::canAssign(unsigned StartReg, unsigned NumRegs) const {
134 for (unsigned N = 0; N < NumRegs; ++N) {
135 unsigned Reg = StartReg + N;
136 if (!MRI->isAllocatable(Reg))
137 return false;
138
139 for (unsigned I = 0; CSRegs[I]; ++I)
140 if (TRI->isSubRegisterEq(Reg, CSRegs[I]) &&
141 !LRM->isPhysRegUsed(CSRegs[I]))
142 return false;
143 }
144
145 return true;
146}
147
148bool GCNNSAReassignImpl::scavengeRegs(
149 SmallVectorImpl<LiveInterval *> &Intervals) const {
150 unsigned NumRegs = Intervals.size();
151
152 if (NumRegs > MaxNumVGPRs)
153 return false;
154 unsigned MaxReg = MaxNumVGPRs - NumRegs + AMDGPU::VGPR0;
155
156 for (unsigned Reg = AMDGPU::VGPR0; Reg <= MaxReg; ++Reg) {
157 if (!canAssign(Reg, NumRegs))
158 continue;
159
160 if (tryAssignRegisters(Intervals, Reg))
161 return true;
162 }
163
164 return false;
165}
166
167GCNNSAReassignImpl::NSA_Status
168GCNNSAReassignImpl::CheckNSA(const MachineInstr &MI, bool Fast) const {
169 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
170 if (!Info)
171 return NSA_Status::NOT_NSA;
172
173 switch (Info->MIMGEncoding) {
174 case AMDGPU::MIMGEncGfx10NSA:
175 case AMDGPU::MIMGEncGfx11NSA:
176 break;
177 default:
178 return NSA_Status::NOT_NSA;
179 }
180
181 int VAddr0Idx =
182 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
183
184 unsigned VgprBase = 0;
185 bool NSA = false;
186 for (unsigned I = 0; I < Info->VAddrOperands; ++I) {
187 const MachineOperand &Op = MI.getOperand(VAddr0Idx + I);
188 Register Reg = Op.getReg();
189 if (Reg.isPhysical() || !VRM->isAssignedReg(Reg))
190 return NSA_Status::FIXED;
191
192 Register PhysReg = VRM->getPhys(Reg);
193
194 if (!Fast) {
195 if (!PhysReg)
196 return NSA_Status::FIXED;
197
198 // TODO: address the below limitation to handle GFX11 BVH instructions
199 // Bail if address is not a VGPR32. That should be possible to extend the
200 // optimization to work with subregs of a wider register tuples, but the
201 // logic to find free registers will be much more complicated with much
202 // less chances for success. That seems reasonable to assume that in most
203 // cases a tuple is used because a vector variable contains different
204 // parts of an address and it is either already consecutive or cannot
205 // be reassigned if not. If needed it is better to rely on register
206 // coalescer to process such address tuples.
207 if (TRI->getRegSizeInBits(*MRI->getRegClass(Reg)) != 32 || Op.getSubReg())
208 return NSA_Status::FIXED;
209
210 // InlineSpiller does not call LRM::assign() after an LI split leaving
211 // it in an inconsistent state, so we cannot call LRM::unassign().
212 // See llvm bug #48911.
213 // Skip reassign if a register has originated from such split.
214 // FIXME: Remove the workaround when bug #48911 is fixed.
215 if (VRM->getPreSplitReg(Reg))
216 return NSA_Status::FIXED;
217
218 const MachineInstr *Def = MRI->getUniqueVRegDef(Reg);
219
220 if (Def && Def->isCopy() && Def->getOperand(1).getReg() == PhysReg)
221 return NSA_Status::FIXED;
222
223 for (auto U : MRI->use_nodbg_operands(Reg)) {
224 if (U.isImplicit())
225 return NSA_Status::FIXED;
226 const MachineInstr *UseInst = U.getParent();
227 if (UseInst->isCopy() && UseInst->getOperand(0).getReg() == PhysReg)
228 return NSA_Status::FIXED;
229 }
230
231 if (!LIS->hasInterval(Reg))
232 return NSA_Status::FIXED;
233 }
234
235 if (I == 0)
236 VgprBase = PhysReg;
237 else if (VgprBase + I != PhysReg)
238 NSA = true;
239 }
240
241 return NSA ? NSA_Status::NON_CONTIGUOUS : NSA_Status::CONTIGUOUS;
242}
243
244bool GCNNSAReassignImpl::run(MachineFunction &MF) {
246 if (!ST->hasNSAEncoding() || !ST->hasNonNSAEncoding())
247 return false;
248
249 MRI = &MF.getRegInfo();
250 TRI = ST->getRegisterInfo();
251
253 MaxNumVGPRs = ST->getMaxNumVGPRs(MF);
254 MaxNumVGPRs = std::min(
255 ST->getMaxNumVGPRs(MFI->getOccupancy(), MFI->getDynamicVGPRBlockSize()),
256 MaxNumVGPRs);
257 CSRegs = MRI->getCalleeSavedRegs();
258
259 using Candidate = std::pair<const MachineInstr*, bool>;
261 for (const MachineBasicBlock &MBB : MF) {
262 for (const MachineInstr &MI : MBB) {
263 switch (CheckNSA(MI)) {
264 default:
265 continue;
266 case NSA_Status::CONTIGUOUS:
267 Candidates.push_back(std::pair(&MI, true));
268 break;
269 case NSA_Status::NON_CONTIGUOUS:
270 Candidates.push_back(std::pair(&MI, false));
271 ++NumNSAInstructions;
272 break;
273 }
274 }
275 }
276
277 bool Changed = false;
278 for (auto &C : Candidates) {
279 if (C.second)
280 continue;
281
282 const MachineInstr *MI = C.first;
283 if (CheckNSA(*MI, true) == NSA_Status::CONTIGUOUS) {
284 // Already happen to be fixed.
285 C.second = true;
286 ++NumNSAConverted;
287 continue;
288 }
289
290 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI->getOpcode());
291 int VAddr0Idx =
292 AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr0);
293
296 SlotIndex MinInd, MaxInd;
297 for (unsigned I = 0; I < Info->VAddrOperands; ++I) {
298 const MachineOperand &Op = MI->getOperand(VAddr0Idx + I);
299 Register Reg = Op.getReg();
300 LiveInterval *LI = &LIS->getInterval(Reg);
301 if (llvm::is_contained(Intervals, LI)) {
302 // Same register used, unable to make sequential
303 Intervals.clear();
304 break;
305 }
306 Intervals.push_back(LI);
307 OrigRegs.push_back(VRM->getPhys(Reg));
308 if (LI->empty()) {
309 // The address input is undef, so it doesn't contribute to the relevant
310 // range. Seed a reasonable index range if required.
311 if (I == 0)
312 MinInd = MaxInd = LIS->getInstructionIndex(*MI);
313 continue;
314 }
315 MinInd = I != 0 ? std::min(MinInd, LI->beginIndex()) : LI->beginIndex();
316 MaxInd = I != 0 ? std::max(MaxInd, LI->endIndex()) : LI->endIndex();
317 }
318
319 if (Intervals.empty())
320 continue;
321
322 LLVM_DEBUG(dbgs() << "Attempting to reassign NSA: " << *MI
323 << "\tOriginal allocation:\t";
324 for (auto *LI
325 : Intervals) dbgs()
326 << " " << llvm::printReg((VRM->getPhys(LI->reg())), TRI);
327 dbgs() << '\n');
328
329 bool Success = scavengeRegs(Intervals);
330 if (!Success) {
331 LLVM_DEBUG(dbgs() << "\tCannot reallocate.\n");
332 if (VRM->hasPhys(Intervals.back()->reg())) // Did not change allocation.
333 continue;
334 } else {
335 // Check we did not make it worse for other instructions.
336 auto *I =
337 std::lower_bound(Candidates.begin(), &C, MinInd,
338 [this](const Candidate &C, SlotIndex I) {
339 return LIS->getInstructionIndex(*C.first) < I;
340 });
341 for (auto *E = Candidates.end();
342 Success && I != E && LIS->getInstructionIndex(*I->first) < MaxInd;
343 ++I) {
344 if (I->second && CheckNSA(*I->first, true) < NSA_Status::CONTIGUOUS) {
345 Success = false;
346 LLVM_DEBUG(dbgs() << "\tNSA conversion conflict with " << *I->first);
347 }
348 }
349 }
350
351 if (!Success) {
352 for (unsigned I = 0; I < Info->VAddrOperands; ++I)
353 if (VRM->hasPhys(Intervals[I]->reg()))
354 LRM->unassign(*Intervals[I]);
355
356 for (unsigned I = 0; I < Info->VAddrOperands; ++I)
357 LRM->assign(*Intervals[I], OrigRegs[I]);
358
359 continue;
360 }
361
362 C.second = true;
363 ++NumNSAConverted;
365 dbgs() << "\tNew allocation:\t\t ["
366 << llvm::printReg((VRM->getPhys(Intervals.front()->reg())), TRI)
367 << " : "
368 << llvm::printReg((VRM->getPhys(Intervals.back()->reg())), TRI)
369 << "]\n");
370 Changed = true;
371 }
372
373 return Changed;
374}
375
376bool GCNNSAReassignLegacy::runOnMachineFunction(MachineFunction &MF) {
377 auto *VRM = &getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
378 auto *LRM = &getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM();
379 auto *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
380
381 GCNNSAReassignImpl Impl(VRM, LRM, LIS);
382 return Impl.run(MF);
383}
384
388 auto &VRM = MFAM.getResult<VirtRegMapAnalysis>(MF);
389 auto &LRM = MFAM.getResult<LiveRegMatrixAnalysis>(MF);
390 auto &LIS = MFAM.getResult<LiveIntervalsAnalysis>(MF);
391
392 GCNNSAReassignImpl Impl(&VRM, &LRM, &LIS);
393 Impl.run(MF);
394 return PreservedAnalyses::all();
395}
unsigned const MachineRegisterInfo * MRI
#define Success
MachineBasicBlock & MBB
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
GCN NSA Reassign
#define DEBUG_TYPE
AMD GCN specific subclass of TargetSubtarget.
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
Register const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:39
Interface definition for SIRegisterInfo.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
#define LLVM_DEBUG(...)
Definition: Debug.h:119
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:255
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:412
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
This class represents an Operation in the Expression.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:690
Register reg() const
Definition: LiveInterval.h:721
bool empty() const
Definition: LiveInterval.h:384
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:387
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
Definition: LiveInterval.h:394
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
Definition: MCRegister.h:69
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Representation of each machine instruction.
Definition: MachineInstr.h:72
bool isCopy() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:595
MachineOperand class - Representation of each machine instruction operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:85
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:118
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:66
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Reg
All possible values of the reg field in the ModR/M byte.
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
char & GCNNSAReassignID
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
void initializeGCNNSAReassignLegacyPass(PassRegistry &)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1916
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
#define N