LLVM 22.0.0git
SILateBranchLowering.cpp
Go to the documentation of this file.
1//===-- SILateBranchLowering.cpp - Final preparation of branches ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This pass mainly lowers early terminate pseudo instructions.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
15#include "GCNSubtarget.h"
21
22using namespace llvm;
23
24#define DEBUG_TYPE "si-late-branch-lowering"
25
26namespace {
27
28class SILateBranchLowering {
29private:
30 const SIRegisterInfo *TRI = nullptr;
31 const SIInstrInfo *TII = nullptr;
32 MachineDominatorTree *MDT = nullptr;
33
34 void expandChainCall(MachineInstr &MI, const GCNSubtarget &ST,
35 bool DynamicVGPR);
36 void earlyTerm(MachineInstr &MI, MachineBasicBlock *EarlyExitBlock);
37
38public:
39 SILateBranchLowering(MachineDominatorTree *MDT) : MDT(MDT) {}
40
41 bool run(MachineFunction &MF);
42
43 unsigned MovOpc;
44 Register ExecReg;
45};
46
47class SILateBranchLoweringLegacy : public MachineFunctionPass {
48public:
49 static char ID;
50 SILateBranchLoweringLegacy() : MachineFunctionPass(ID) {}
51
52 bool runOnMachineFunction(MachineFunction &MF) override {
53 auto *MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
54 return SILateBranchLowering(MDT).run(MF);
55 }
56
57 StringRef getPassName() const override {
58 return "SI Final Branch Preparation";
59 }
60
61 void getAnalysisUsage(AnalysisUsage &AU) const override {
65 }
66};
67
68} // end anonymous namespace
69
70char SILateBranchLoweringLegacy::ID = 0;
71
72INITIALIZE_PASS_BEGIN(SILateBranchLoweringLegacy, DEBUG_TYPE,
73 "SI insert s_cbranch_execz instructions", false, false)
75INITIALIZE_PASS_END(SILateBranchLoweringLegacy, DEBUG_TYPE,
76 "SI insert s_cbranch_execz instructions", false, false)
77
78char &llvm::SILateBranchLoweringPassID = SILateBranchLoweringLegacy::ID;
79
81 MachineBasicBlock::iterator I, DebugLoc DL,
83 const Function &F = MF.getFunction();
84 bool IsPS = F.getCallingConv() == CallingConv::AMDGPU_PS;
85
86 // Check if hardware has been configured to expect color or depth exports.
87 bool HasColorExports = AMDGPU::getHasColorExport(F);
88 bool HasDepthExports = AMDGPU::getHasDepthExport(F);
89 bool HasExports = HasColorExports || HasDepthExports;
90
91 // Prior to GFX10, hardware always expects at least one export for PS.
92 bool MustExport = !AMDGPU::isGFX10Plus(TII->getSubtarget());
93
94 if (IsPS && (HasExports || MustExport)) {
95 // Generate "null export" if hardware is expecting PS to export.
97 int Target =
98 ST.hasNullExportTarget()
100 : (HasColorExports ? AMDGPU::Exp::ET_MRT0 : AMDGPU::Exp::ET_MRTZ);
101 BuildMI(MBB, I, DL, TII->get(AMDGPU::EXP_DONE))
102 .addImm(Target)
103 .addReg(AMDGPU::VGPR0, RegState::Undef)
104 .addReg(AMDGPU::VGPR0, RegState::Undef)
105 .addReg(AMDGPU::VGPR0, RegState::Undef)
106 .addReg(AMDGPU::VGPR0, RegState::Undef)
107 .addImm(1) // vm
108 .addImm(0) // compr
109 .addImm(0); // en
110 }
111
112 // s_endpgm
113 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ENDPGM)).addImm(0);
114}
115
118 MachineBasicBlock *SplitBB = MBB.splitAt(MI, /*UpdateLiveIns*/ true);
119
120 // Update dominator tree
121 using DomTreeT = DomTreeBase<MachineBasicBlock>;
123 for (MachineBasicBlock *Succ : SplitBB->successors()) {
124 DTUpdates.push_back({DomTreeT::Insert, SplitBB, Succ});
125 DTUpdates.push_back({DomTreeT::Delete, &MBB, Succ});
126 }
127 DTUpdates.push_back({DomTreeT::Insert, &MBB, SplitBB});
128 MDT->applyUpdates(DTUpdates);
129}
130
133 if (Op.isReg())
134 MIB.addReg(Op.getReg());
135 else
136 MIB.add(Op);
137}
138
139void SILateBranchLowering::expandChainCall(MachineInstr &MI,
140 const GCNSubtarget &ST,
141 bool DynamicVGPR) {
142 // This is a tail call that needs to be expanded into at least
143 // 2 instructions, one for setting EXEC and one for the actual tail call.
144 int ExecIdx =
145 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::exec);
146 assert(ExecIdx != -1 && "Missing EXEC operand");
147 const DebugLoc &DL = MI.getDebugLoc();
148 if (DynamicVGPR) {
149 // We have 3 extra operands and we need to:
150 // * Try to change the VGPR allocation
151 // * Select the callee based on the result of the reallocation attempt
152 // * Select the EXEC mask based on the result of the reallocation attempt
153 // If any of the register operands of the chain pseudo is used in more than
154 // one of these instructions, we need to make sure that the kill flags
155 // aren't copied along.
156 auto AllocMI =
157 BuildMI(*MI.getParent(), MI, DL, TII->get(AMDGPU::S_ALLOC_VGPR));
158 copyOpWithoutRegFlags(AllocMI,
159 *TII->getNamedOperand(MI, AMDGPU::OpName::numvgprs));
160
161 auto SelectCallee =
162 BuildMI(*MI.getParent(), MI, DL, TII->get(AMDGPU::S_CSELECT_B64))
163 .addDef(TII->getNamedOperand(MI, AMDGPU::OpName::src0)->getReg());
164 copyOpWithoutRegFlags(SelectCallee,
165 *TII->getNamedOperand(MI, AMDGPU::OpName::src0));
166 copyOpWithoutRegFlags(SelectCallee,
167 *TII->getNamedOperand(MI, AMDGPU::OpName::fbcallee));
168
169 auto SelectExec = BuildMI(*MI.getParent(), MI, DL,
170 TII->get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
171 : AMDGPU::S_CSELECT_B64))
172 .addDef(ExecReg);
173
174 copyOpWithoutRegFlags(SelectExec,
175 *TII->getNamedOperand(MI, AMDGPU::OpName::exec));
176 copyOpWithoutRegFlags(SelectExec,
177 *TII->getNamedOperand(MI, AMDGPU::OpName::fbexec));
178 } else {
179 auto SetExec = BuildMI(*MI.getParent(), MI, DL, TII->get(MovOpc), ExecReg);
180 copyOpWithoutRegFlags(SetExec,
181 *TII->getNamedOperand(MI, AMDGPU::OpName::exec));
182 }
183
184 for (int OpIdx = MI.getNumExplicitOperands() - 1; OpIdx >= ExecIdx; --OpIdx)
185 MI.removeOperand(OpIdx);
186
187 MI.setDesc(TII->get(AMDGPU::SI_TCRETURN));
188}
189
190void SILateBranchLowering::earlyTerm(MachineInstr &MI,
191 MachineBasicBlock *EarlyExitBlock) {
192 MachineBasicBlock &MBB = *MI.getParent();
193 const DebugLoc DL = MI.getDebugLoc();
194
195 auto BranchMI = BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_CBRANCH_SCC0))
196 .addMBB(EarlyExitBlock);
197 auto Next = std::next(MI.getIterator());
198
199 if (Next != MBB.end() && !Next->isTerminator())
200 splitBlock(MBB, *BranchMI, MDT);
201
202 MBB.addSuccessor(EarlyExitBlock);
203 MDT->insertEdge(&MBB, EarlyExitBlock);
204}
205
209 auto *MDT = &MFAM.getResult<MachineDominatorTreeAnalysis>(MF);
210 if (!SILateBranchLowering(MDT).run(MF))
211 return PreservedAnalyses::all();
212
215}
216
217bool SILateBranchLowering::run(MachineFunction &MF) {
218 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
219 TII = ST.getInstrInfo();
220 TRI = &TII->getRegisterInfo();
221
222 MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
223 ExecReg = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
224
225 SmallVector<MachineInstr *, 4> EarlyTermInstrs;
227 bool MadeChange = false;
228
229 for (MachineBasicBlock &MBB : MF) {
231 switch (MI.getOpcode()) {
232 case AMDGPU::S_BRANCH:
233 // Optimize out branches to the next block.
234 // This only occurs in -O0 when BranchFolding is not executed.
235 if (MBB.isLayoutSuccessor(MI.getOperand(0).getMBB())) {
236 assert(&MI == &MBB.back());
237 MI.eraseFromParent();
238 MadeChange = true;
239 }
240 break;
241
242 case AMDGPU::SI_CS_CHAIN_TC_W32:
243 case AMDGPU::SI_CS_CHAIN_TC_W64:
244 expandChainCall(MI, ST, /*DynamicVGPR=*/false);
245 MadeChange = true;
246 break;
247 case AMDGPU::SI_CS_CHAIN_TC_W32_DVGPR:
248 case AMDGPU::SI_CS_CHAIN_TC_W64_DVGPR:
249 expandChainCall(MI, ST, /*DynamicVGPR=*/true);
250 MadeChange = true;
251 break;
252
253 case AMDGPU::SI_EARLY_TERMINATE_SCC0:
254 EarlyTermInstrs.push_back(&MI);
255 break;
256
257 case AMDGPU::SI_RETURN_TO_EPILOG:
258 EpilogInstrs.push_back(&MI);
259 break;
260
261 default:
262 break;
263 }
264 }
265 }
266
267 // Lower any early exit branches first
268 if (!EarlyTermInstrs.empty()) {
269 MachineBasicBlock *EarlyExitBlock = MF.CreateMachineBasicBlock();
270 DebugLoc DL;
271
272 MF.insert(MF.end(), EarlyExitBlock);
273 BuildMI(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII->get(MovOpc),
274 ExecReg)
275 .addImm(0);
276 generateEndPgm(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII, MF);
277
278 for (MachineInstr *Instr : EarlyTermInstrs) {
279 // Early termination in GS does nothing
280 if (MF.getFunction().getCallingConv() != CallingConv::AMDGPU_GS)
281 earlyTerm(*Instr, EarlyExitBlock);
282 Instr->eraseFromParent();
283 }
284
285 EarlyTermInstrs.clear();
286 MadeChange = true;
287 }
288
289 // Now check return to epilog instructions occur at function end
290 if (!EpilogInstrs.empty()) {
291 MachineBasicBlock *EmptyMBBAtEnd = nullptr;
292 assert(!MF.getInfo<SIMachineFunctionInfo>()->returnsVoid());
293
294 // If there are multiple returns to epilog then all will
295 // become jumps to new empty end block.
296 if (EpilogInstrs.size() > 1) {
297 EmptyMBBAtEnd = MF.CreateMachineBasicBlock();
298 MF.insert(MF.end(), EmptyMBBAtEnd);
299 }
300
301 for (auto *MI : EpilogInstrs) {
302 auto *MBB = MI->getParent();
303 if (MBB == &MF.back() && MI == &MBB->back())
304 continue;
305
306 // SI_RETURN_TO_EPILOG is not the last instruction.
307 // Jump to empty block at function end.
308 if (!EmptyMBBAtEnd) {
309 EmptyMBBAtEnd = MF.CreateMachineBasicBlock();
310 MF.insert(MF.end(), EmptyMBBAtEnd);
311 }
312
313 MBB->addSuccessor(EmptyMBBAtEnd);
314 MDT->insertEdge(MBB, EmptyMBBAtEnd);
315 BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::S_BRANCH))
316 .addMBB(EmptyMBBAtEnd);
317 MI->eraseFromParent();
318 MadeChange = true;
319 }
320
321 EpilogInstrs.clear();
322 }
323
324 return MadeChange;
325}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
Provides AMDGPU specific target descriptions.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Register const TargetRegisterInfo * TRI
MachineInstr unsigned OpIdx
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:39
static void generateEndPgm(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, const SIInstrInfo *TII, MachineFunction &MF)
static void copyOpWithoutRegFlags(MachineInstrBuilder &MIB, MachineOperand &Op)
static void splitBlock(MachineBasicBlock &MBB, MachineInstr &MI, MachineDominatorTree *MDT)
#define DEBUG_TYPE
SI insert s_cbranch_execz instructions
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:255
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:412
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:124
Core dominator tree base class.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
LLVM_ABI bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
LLVM_ABI MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:72
MachineOperand class - Representation of each machine instruction operand.
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:85
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition: Analysis.h:132
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
Target - Wrapper for Target specific information.
bool getHasColorExport(const Function &F)
bool getHasDepthExport(const Function &F)
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:191
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:194
@ Undef
Value of the register doesn't matter.
NodeAddr< InstrNode * > Instr
Definition: RDFGraph.h:389
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:663
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
char & SILateBranchLoweringPassID