LLVM 22.0.0git
SIOptimizeVGPRLiveRange.cpp
Go to the documentation of this file.
1//===--------------------- SIOptimizeVGPRLiveRange.cpp -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This pass tries to remove unnecessary VGPR live ranges in divergent if-else
11/// structures and waterfall loops.
12///
13/// When we do structurization, we usually transform an if-else into two
14/// successive if-then (with a flow block to do predicate inversion). Consider a
15/// simple case after structurization: A divergent value %a was defined before
16/// if-else and used in both THEN (use in THEN is optional) and ELSE part:
17/// bb.if:
18/// %a = ...
19/// ...
20/// bb.then:
21/// ... = op %a
22/// ... // %a can be dead here
23/// bb.flow:
24/// ...
25/// bb.else:
26/// ... = %a
27/// ...
28/// bb.endif
29///
30/// As register allocator has no idea of the thread-control-flow, it will just
31/// assume %a would be alive in the whole range of bb.then because of a later
32/// use in bb.else. On AMDGPU architecture, the VGPR is accessed with respect
33/// to exec mask. For this if-else case, the lanes active in bb.then will be
34/// inactive in bb.else, and vice-versa. So we are safe to say that %a was dead
35/// after the last use in bb.then until the end of the block. The reason is
36/// the instructions in bb.then will only overwrite lanes that will never be
37/// accessed in bb.else.
38///
39/// This pass aims to tell register allocator that %a is in-fact dead,
40/// through inserting a phi-node in bb.flow saying that %a is undef when coming
41/// from bb.then, and then replace the uses in the bb.else with the result of
42/// newly inserted phi.
43///
44/// Two key conditions must be met to ensure correctness:
45/// 1.) The def-point should be in the same loop-level as if-else-endif to make
46/// sure the second loop iteration still get correct data.
47/// 2.) There should be no further uses after the IF-ELSE region.
48///
49///
50/// Waterfall loops get inserted around instructions that use divergent values
51/// but can only be executed with a uniform value. For example an indirect call
52/// to a divergent address:
53/// bb.start:
54/// %a = ...
55/// %fun = ...
56/// ...
57/// bb.loop:
58/// call %fun (%a)
59/// ... // %a can be dead here
60/// loop %bb.loop
61///
62/// The loop block is executed multiple times, but it is run exactly once for
63/// each active lane. Similar to the if-else case, the register allocator
64/// assumes that %a is live throughout the loop as it is used again in the next
65/// iteration. If %a is a VGPR that is unused after the loop, it does not need
66/// to be live after its last use in the loop block. By inserting a phi-node at
67/// the start of bb.loop that is undef when coming from bb.loop, the register
68/// allocation knows that the value of %a does not need to be preserved through
69/// iterations of the loop.
70///
71//
72//===----------------------------------------------------------------------===//
73
75#include "AMDGPU.h"
76#include "GCNSubtarget.h"
83#include "llvm/IR/Dominators.h"
84
85using namespace llvm;
86
87#define DEBUG_TYPE "si-opt-vgpr-liverange"
88
89namespace {
90
91class SIOptimizeVGPRLiveRange {
92private:
93 const SIRegisterInfo *TRI = nullptr;
94 const SIInstrInfo *TII = nullptr;
95 LiveVariables *LV = nullptr;
96 MachineDominatorTree *MDT = nullptr;
97 const MachineLoopInfo *Loops = nullptr;
98 MachineRegisterInfo *MRI = nullptr;
99
100public:
101 SIOptimizeVGPRLiveRange(LiveVariables *LV, MachineDominatorTree *MDT,
103 : LV(LV), MDT(MDT), Loops(Loops) {}
104 bool run(MachineFunction &MF);
105
106 MachineBasicBlock *getElseTarget(MachineBasicBlock *MBB) const;
107
108 void collectElseRegionBlocks(MachineBasicBlock *Flow,
109 MachineBasicBlock *Endif,
111
112 void
113 collectCandidateRegisters(MachineBasicBlock *If, MachineBasicBlock *Flow,
114 MachineBasicBlock *Endif,
116 SmallVectorImpl<Register> &CandidateRegs) const;
117
118 void collectWaterfallCandidateRegisters(
119 MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd,
120 SmallSetVector<Register, 16> &CandidateRegs,
122 SmallVectorImpl<MachineInstr *> &Instructions) const;
123
124 void findNonPHIUsesInBlock(Register Reg, MachineBasicBlock *MBB,
126
127 void updateLiveRangeInThenRegion(Register Reg, MachineBasicBlock *If,
128 MachineBasicBlock *Flow) const;
129
130 void updateLiveRangeInElseRegion(
132 MachineBasicBlock *Endif,
134
135 void
136 optimizeLiveRange(Register Reg, MachineBasicBlock *If,
139
140 void optimizeWaterfallLiveRange(
141 Register Reg, MachineBasicBlock *LoopHeader,
143 SmallVectorImpl<MachineInstr *> &Instructions) const;
144};
145
146class SIOptimizeVGPRLiveRangeLegacy : public MachineFunctionPass {
147public:
148 static char ID;
149
150 SIOptimizeVGPRLiveRangeLegacy() : MachineFunctionPass(ID) {}
151
152 bool runOnMachineFunction(MachineFunction &MF) override;
153
154 StringRef getPassName() const override {
155 return "SI Optimize VGPR LiveRange";
156 }
157
158 void getAnalysisUsage(AnalysisUsage &AU) const override {
159 AU.setPreservesCFG();
167 }
168
170 return MachineFunctionProperties().setIsSSA();
171 }
172
174 return MachineFunctionProperties().setNoPHIs();
175 }
176};
177
178} // end anonymous namespace
179
180// Check whether the MBB is a else flow block and get the branching target which
181// is the Endif block
183SIOptimizeVGPRLiveRange::getElseTarget(MachineBasicBlock *MBB) const {
184 for (auto &BR : MBB->terminators()) {
185 if (BR.getOpcode() == AMDGPU::SI_ELSE)
186 return BR.getOperand(2).getMBB();
187 }
188 return nullptr;
189}
190
191void SIOptimizeVGPRLiveRange::collectElseRegionBlocks(
194 assert(Flow != Endif);
195
197 unsigned Cur = 0;
198 while (MBB) {
199 for (auto *Pred : MBB->predecessors()) {
200 if (Pred != Flow)
201 Blocks.insert(Pred);
202 }
203
204 if (Cur < Blocks.size())
205 MBB = Blocks[Cur++];
206 else
207 MBB = nullptr;
208 }
209
210 LLVM_DEBUG({
211 dbgs() << "Found Else blocks: ";
212 for (auto *MBB : Blocks)
213 dbgs() << printMBBReference(*MBB) << ' ';
214 dbgs() << '\n';
215 });
216}
217
218/// Find the instructions(excluding phi) in \p MBB that uses the \p Reg.
219void SIOptimizeVGPRLiveRange::findNonPHIUsesInBlock(
222 for (auto &UseMI : MRI->use_nodbg_instructions(Reg)) {
223 if (UseMI.getParent() == MBB && !UseMI.isPHI())
224 Uses.push_back(&UseMI);
225 }
226}
227
228/// Collect the killed registers in the ELSE region which are not alive through
229/// the whole THEN region.
230void SIOptimizeVGPRLiveRange::collectCandidateRegisters(
233 SmallVectorImpl<Register> &CandidateRegs) const {
234
235 SmallSet<Register, 8> KillsInElse;
236
237 for (auto *Else : ElseBlocks) {
238 for (auto &MI : Else->instrs()) {
239 if (MI.isDebugInstr())
240 continue;
241
242 for (auto &MO : MI.operands()) {
243 if (!MO.isReg() || !MO.getReg() || MO.isDef())
244 continue;
245
246 Register MOReg = MO.getReg();
247 // We can only optimize AGPR/VGPR virtual register
248 if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg))
249 continue;
250
251 if (MO.readsReg()) {
252 LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg);
253 const MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent();
254 // Make sure two conditions are met:
255 // a.) the value is defined before/in the IF block
256 // b.) should be defined in the same loop-level.
257 if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) &&
258 Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If)) {
259 // Check if the register is live into the endif block. If not,
260 // consider it killed in the else region.
261 LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg);
262 if (!VI.isLiveIn(*Endif, MOReg, *MRI)) {
263 KillsInElse.insert(MOReg);
264 } else {
265 LLVM_DEBUG(dbgs() << "Excluding " << printReg(MOReg, TRI)
266 << " as Live in Endif\n");
267 }
268 }
269 }
270 }
271 }
272 }
273
274 // Check the phis in the Endif, looking for value coming from the ELSE
275 // region. Make sure the phi-use is the last use.
276 for (auto &MI : Endif->phis()) {
277 for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
278 auto &MO = MI.getOperand(Idx);
279 auto *Pred = MI.getOperand(Idx + 1).getMBB();
280 if (Pred == Flow)
281 continue;
282 assert(ElseBlocks.contains(Pred) && "Should be from Else region\n");
283
284 if (!MO.isReg() || !MO.getReg() || MO.isUndef())
285 continue;
286
287 Register Reg = MO.getReg();
288 if (Reg.isPhysical() || !TRI->isVectorRegister(*MRI, Reg))
289 continue;
290
291 LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
292
293 if (VI.isLiveIn(*Endif, Reg, *MRI)) {
294 LLVM_DEBUG(dbgs() << "Excluding " << printReg(Reg, TRI)
295 << " as Live in Endif\n");
296 continue;
297 }
298 // Make sure two conditions are met:
299 // a.) the value is defined before/in the IF block
300 // b.) should be defined in the same loop-level.
301 const MachineBasicBlock *DefMBB = MRI->getVRegDef(Reg)->getParent();
302 if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) &&
303 Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If))
304 KillsInElse.insert(Reg);
305 }
306 }
307
308 auto IsLiveThroughThen = [&](Register Reg) {
309 for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
310 ++I) {
311 if (!I->readsReg())
312 continue;
313 auto *UseMI = I->getParent();
314 auto *UseMBB = UseMI->getParent();
315 if (UseMBB == Flow || UseMBB == Endif) {
316 if (!UseMI->isPHI())
317 return true;
318
319 auto *IncomingMBB = UseMI->getOperand(I.getOperandNo() + 1).getMBB();
320 // The register is live through the path If->Flow or Flow->Endif.
321 // we should not optimize for such cases.
322 if ((UseMBB == Flow && IncomingMBB != If) ||
323 (UseMBB == Endif && IncomingMBB == Flow))
324 return true;
325 }
326 }
327 return false;
328 };
329
330 for (auto Reg : KillsInElse) {
331 if (!IsLiveThroughThen(Reg))
332 CandidateRegs.push_back(Reg);
333 }
334}
335
336/// Collect the registers used in the waterfall loop block that are defined
337/// before.
338void SIOptimizeVGPRLiveRange::collectWaterfallCandidateRegisters(
339 MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd,
340 SmallSetVector<Register, 16> &CandidateRegs,
342 SmallVectorImpl<MachineInstr *> &Instructions) const {
343
344 // Collect loop instructions, potentially spanning multiple blocks
345 auto *MBB = LoopHeader;
346 for (;;) {
347 Blocks.insert(MBB);
348 for (auto &MI : *MBB) {
349 if (MI.isDebugInstr())
350 continue;
351 Instructions.push_back(&MI);
352 }
353 if (MBB == LoopEnd)
354 break;
355
356 if ((MBB != LoopHeader && MBB->pred_size() != 1) ||
357 (MBB == LoopHeader && MBB->pred_size() != 2) || MBB->succ_size() != 1) {
358 LLVM_DEBUG(dbgs() << "Unexpected edges in CFG, ignoring loop\n");
359 return;
360 }
361
362 MBB = *MBB->succ_begin();
363 }
364
365 for (auto *I : Instructions) {
366 auto &MI = *I;
367
368 for (auto &MO : MI.all_uses()) {
369 if (!MO.getReg())
370 continue;
371
372 Register MOReg = MO.getReg();
373 // We can only optimize AGPR/VGPR virtual register
374 if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg))
375 continue;
376
377 if (MO.readsReg()) {
378 MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent();
379 // Make sure the value is defined before the LOOP block
380 if (!Blocks.contains(DefMBB) && !CandidateRegs.contains(MOReg)) {
381 // If the variable is used after the loop, the register coalescer will
382 // merge the newly created register and remove the phi node again.
383 // Just do nothing in that case.
384 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(MOReg);
385 bool IsUsed = false;
386 for (auto *Succ : LoopEnd->successors()) {
387 if (!Blocks.contains(Succ) &&
388 OldVarInfo.isLiveIn(*Succ, MOReg, *MRI)) {
389 IsUsed = true;
390 break;
391 }
392 }
393 if (!IsUsed) {
394 LLVM_DEBUG(dbgs() << "Found candidate reg: "
395 << printReg(MOReg, TRI, 0, MRI) << '\n');
396 CandidateRegs.insert(MOReg);
397 } else {
398 LLVM_DEBUG(dbgs() << "Reg is used after loop, ignoring: "
399 << printReg(MOReg, TRI, 0, MRI) << '\n');
400 }
401 }
402 }
403 }
404 }
405}
406
407// Re-calculate the liveness of \p Reg in the THEN-region
408void SIOptimizeVGPRLiveRange::updateLiveRangeInThenRegion(
412
413 // Collect all successors until we see the flow block, where we should
414 // reconverge.
415 while (!WorkList.empty()) {
416 auto *MBB = WorkList.pop_back_val();
417 for (auto *Succ : MBB->successors()) {
418 if (Succ != Flow && Blocks.insert(Succ))
419 WorkList.push_back(Succ);
420 }
421 }
422
423 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
424 for (MachineBasicBlock *MBB : Blocks) {
425 // Clear Live bit, as we will recalculate afterwards
426 LLVM_DEBUG(dbgs() << "Clear AliveBlock " << printMBBReference(*MBB)
427 << '\n');
428 OldVarInfo.AliveBlocks.reset(MBB->getNumber());
429 }
430
432
433 // Get the blocks the Reg should be alive through
434 for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
435 ++I) {
436 auto *UseMI = I->getParent();
437 if (UseMI->isPHI() && I->readsReg()) {
438 if (Blocks.contains(UseMI->getParent()))
439 PHIIncoming.insert(UseMI->getOperand(I.getOperandNo() + 1).getMBB());
440 }
441 }
442
443 for (MachineBasicBlock *MBB : Blocks) {
445 // PHI instructions has been processed before.
446 findNonPHIUsesInBlock(Reg, MBB, Uses);
447
448 if (Uses.size() == 1) {
449 LLVM_DEBUG(dbgs() << "Found one Non-PHI use in "
450 << printMBBReference(*MBB) << '\n');
451 LV->HandleVirtRegUse(Reg, MBB, *(*Uses.begin()));
452 } else if (Uses.size() > 1) {
453 // Process the instructions in-order
454 LLVM_DEBUG(dbgs() << "Found " << Uses.size() << " Non-PHI uses in "
455 << printMBBReference(*MBB) << '\n');
456 for (MachineInstr &MI : *MBB) {
458 LV->HandleVirtRegUse(Reg, MBB, MI);
459 }
460 }
461
462 // Mark Reg alive through the block if this is a PHI incoming block
463 if (PHIIncoming.contains(MBB))
464 LV->MarkVirtRegAliveInBlock(OldVarInfo, MRI->getVRegDef(Reg)->getParent(),
465 MBB);
466 }
467
468 // Set the isKilled flag if we get new Kills in the THEN region.
469 for (auto *MI : OldVarInfo.Kills) {
470 if (Blocks.contains(MI->getParent()))
471 MI->addRegisterKilled(Reg, TRI);
472 }
473}
474
475void SIOptimizeVGPRLiveRange::updateLiveRangeInElseRegion(
477 MachineBasicBlock *Endif,
478 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const {
479 LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg);
480 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
481
482 // Transfer aliveBlocks from Reg to NewReg
483 for (auto *MBB : ElseBlocks) {
484 unsigned BBNum = MBB->getNumber();
485 if (OldVarInfo.AliveBlocks.test(BBNum)) {
486 NewVarInfo.AliveBlocks.set(BBNum);
487 LLVM_DEBUG(dbgs() << "Removing AliveBlock " << printMBBReference(*MBB)
488 << '\n');
489 OldVarInfo.AliveBlocks.reset(BBNum);
490 }
491 }
492
493 // Transfer the possible Kills in ElseBlocks from Reg to NewReg
494 auto I = OldVarInfo.Kills.begin();
495 while (I != OldVarInfo.Kills.end()) {
496 if (ElseBlocks.contains((*I)->getParent())) {
497 NewVarInfo.Kills.push_back(*I);
498 I = OldVarInfo.Kills.erase(I);
499 } else {
500 ++I;
501 }
502 }
503}
504
505void SIOptimizeVGPRLiveRange::optimizeLiveRange(
507 MachineBasicBlock *Endif,
508 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const {
509 // Insert a new PHI, marking the value from the THEN region being
510 // undef.
511 LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n');
512 const auto *RC = MRI->getRegClass(Reg);
513 Register NewReg = MRI->createVirtualRegister(RC);
514 Register UndefReg = MRI->createVirtualRegister(RC);
515 MachineInstrBuilder PHI = BuildMI(*Flow, Flow->getFirstNonPHI(), DebugLoc(),
516 TII->get(TargetOpcode::PHI), NewReg);
517 for (auto *Pred : Flow->predecessors()) {
518 if (Pred == If)
519 PHI.addReg(Reg).addMBB(Pred);
520 else
521 PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred);
522 }
523
524 // Replace all uses in the ELSE region or the PHIs in ENDIF block
525 // Use early increment range because setReg() will update the linked list.
526 for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) {
527 auto *UseMI = O.getParent();
528 auto *UseBlock = UseMI->getParent();
529 // Replace uses in Endif block
530 if (UseBlock == Endif) {
531 if (UseMI->isPHI())
532 O.setReg(NewReg);
533 else if (UseMI->isDebugInstr())
534 continue;
535 else {
536 // DetectDeadLanes may mark register uses as undef without removing
537 // them, in which case a non-phi instruction using the original register
538 // may exist in the Endif block even though the register is not live
539 // into it.
540 assert(!O.readsReg());
541 }
542 continue;
543 }
544
545 // Replace uses in Else region
546 if (ElseBlocks.contains(UseBlock))
547 O.setReg(NewReg);
548 }
549
550 // The optimized Reg is not alive through Flow blocks anymore.
551 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
552 OldVarInfo.AliveBlocks.reset(Flow->getNumber());
553
554 updateLiveRangeInElseRegion(Reg, NewReg, Flow, Endif, ElseBlocks);
555 updateLiveRangeInThenRegion(Reg, If, Flow);
556}
557
558void SIOptimizeVGPRLiveRange::optimizeWaterfallLiveRange(
559 Register Reg, MachineBasicBlock *LoopHeader,
561 SmallVectorImpl<MachineInstr *> &Instructions) const {
562 // Insert a new PHI, marking the value from the last loop iteration undef.
563 LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n');
564 const auto *RC = MRI->getRegClass(Reg);
565 Register NewReg = MRI->createVirtualRegister(RC);
566 Register UndefReg = MRI->createVirtualRegister(RC);
567
568 // Replace all uses in the LOOP region
569 // Use early increment range because setReg() will update the linked list.
570 for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) {
571 auto *UseMI = O.getParent();
572 auto *UseBlock = UseMI->getParent();
573 // Replace uses in Loop blocks
574 if (Blocks.contains(UseBlock))
575 O.setReg(NewReg);
576 }
577
579 BuildMI(*LoopHeader, LoopHeader->getFirstNonPHI(), DebugLoc(),
580 TII->get(TargetOpcode::PHI), NewReg);
581 for (auto *Pred : LoopHeader->predecessors()) {
582 if (Blocks.contains(Pred))
583 PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred);
584 else
585 PHI.addReg(Reg).addMBB(Pred);
586 }
587
588 LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg);
589 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
590
591 // Find last use and mark as kill
592 MachineInstr *Kill = nullptr;
593 for (auto *MI : reverse(Instructions)) {
594 if (MI->readsRegister(NewReg, TRI)) {
595 MI->addRegisterKilled(NewReg, TRI);
596 NewVarInfo.Kills.push_back(MI);
597 Kill = MI;
598 break;
599 }
600 }
601 assert(Kill && "Failed to find last usage of register in loop");
602
603 MachineBasicBlock *KillBlock = Kill->getParent();
604 bool PostKillBlock = false;
605 for (auto *Block : Blocks) {
606 auto BBNum = Block->getNumber();
607
608 // collectWaterfallCandidateRegisters only collects registers that are dead
609 // after the loop. So we know that the old reg is no longer live throughout
610 // the waterfall loop.
611 OldVarInfo.AliveBlocks.reset(BBNum);
612
613 // The new register is live up to (and including) the block that kills it.
614 PostKillBlock |= (Block == KillBlock);
615 if (PostKillBlock) {
616 NewVarInfo.AliveBlocks.reset(BBNum);
617 } else if (Block != LoopHeader) {
618 NewVarInfo.AliveBlocks.set(BBNum);
619 }
620 }
621}
622
623char SIOptimizeVGPRLiveRangeLegacy::ID = 0;
624
625INITIALIZE_PASS_BEGIN(SIOptimizeVGPRLiveRangeLegacy, DEBUG_TYPE,
626 "SI Optimize VGPR LiveRange", false, false)
630INITIALIZE_PASS_END(SIOptimizeVGPRLiveRangeLegacy, DEBUG_TYPE,
632
633char &llvm::SIOptimizeVGPRLiveRangeLegacyID = SIOptimizeVGPRLiveRangeLegacy::ID;
634
636 return new SIOptimizeVGPRLiveRangeLegacy();
637}
638
639bool SIOptimizeVGPRLiveRangeLegacy::runOnMachineFunction(MachineFunction &MF) {
640 if (skipFunction(MF.getFunction()))
641 return false;
642
643 LiveVariables *LV = &getAnalysis<LiveVariablesWrapperPass>().getLV();
645 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
646 MachineLoopInfo *Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
647 return SIOptimizeVGPRLiveRange(LV, MDT, Loops).run(MF);
648}
649
653 MFPropsModifier _(*this, MF);
657
658 bool Changed = SIOptimizeVGPRLiveRange(LV, MDT, Loops).run(MF);
659 if (!Changed)
660 return PreservedAnalyses::all();
661
663 PA.preserve<LiveVariablesAnalysis>();
664 PA.preserve<DominatorTreeAnalysis>();
665 PA.preserve<MachineLoopAnalysis>();
666 PA.preserveSet<CFGAnalyses>();
667 return PA;
668}
669
670bool SIOptimizeVGPRLiveRange::run(MachineFunction &MF) {
671 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
672 TII = ST.getInstrInfo();
673 TRI = &TII->getRegisterInfo();
674 MRI = &MF.getRegInfo();
675
676 bool MadeChange = false;
677
678 // TODO: we need to think about the order of visiting the blocks to get
679 // optimal result for nesting if-else cases.
680 for (MachineBasicBlock &MBB : MF) {
681 for (auto &MI : MBB.terminators()) {
682 // Detect the if-else blocks
683 if (MI.getOpcode() == AMDGPU::SI_IF) {
684 MachineBasicBlock *IfTarget = MI.getOperand(2).getMBB();
685 auto *Endif = getElseTarget(IfTarget);
686 if (!Endif)
687 continue;
688
689 // Skip unexpected control flow.
690 if (!MDT->dominates(&MBB, IfTarget) || !MDT->dominates(IfTarget, Endif))
691 continue;
692
694 SmallVector<Register> CandidateRegs;
695
696 LLVM_DEBUG(dbgs() << "Checking IF-ELSE-ENDIF: "
697 << printMBBReference(MBB) << ' '
698 << printMBBReference(*IfTarget) << ' '
699 << printMBBReference(*Endif) << '\n');
700
701 // Collect all the blocks in the ELSE region
702 collectElseRegionBlocks(IfTarget, Endif, ElseBlocks);
703
704 // Collect the registers can be optimized
705 collectCandidateRegisters(&MBB, IfTarget, Endif, ElseBlocks,
706 CandidateRegs);
707 MadeChange |= !CandidateRegs.empty();
708 // Now we are safe to optimize.
709 for (auto Reg : CandidateRegs)
710 optimizeLiveRange(Reg, &MBB, IfTarget, Endif, ElseBlocks);
711 } else if (MI.getOpcode() == AMDGPU::SI_WATERFALL_LOOP) {
712 auto *LoopHeader = MI.getOperand(0).getMBB();
713 auto *LoopEnd = &MBB;
714
715 LLVM_DEBUG(dbgs() << "Checking Waterfall loop: "
716 << printMBBReference(*LoopHeader) << '\n');
717
718 SmallSetVector<Register, 16> CandidateRegs;
721
722 collectWaterfallCandidateRegisters(LoopHeader, LoopEnd, CandidateRegs,
723 Blocks, Instructions);
724 MadeChange |= !CandidateRegs.empty();
725 // Now we are safe to optimize.
726 for (auto Reg : CandidateRegs)
727 optimizeWaterfallLiveRange(Reg, LoopHeader, Blocks, Instructions);
728 }
729 }
730 }
731
732 return MadeChange;
733}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
Rewrite undef for PHI
MachineBasicBlock & MBB
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
Hexagon Hardware Loops
#define _
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
Register const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:39
Remove Loads Into Fake Uses
Annotate SI Control Flow
#define DEBUG_TYPE
#define LLVM_DEBUG(...)
Definition: Debug.h:119
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:255
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:412
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:270
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:73
A debug info location.
Definition: DebugLoc.h:124
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:284
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:314
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:158
An RAII based helper class to modify MachineFunctionProperties when running pass.
unsigned pred_size() const
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
void push_back(MachineInstr *MI)
unsigned succ_size() const
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineInstr *A, const MachineInstr *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual MachineFunctionProperties getClearedProperties() const
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
Definition: MachineInstr.h:72
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:359
bool isDebugInstr() const
bool isPHI() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:595
Analysis pass that exposes the MachineLoopInfo for a machine function.
LLVM_ABI Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
MachineBasicBlock * getMBB() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:85
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:118
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:78
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
A vector that has set insertion semantics.
Definition: SetVector.h:59
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:99
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:168
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
Definition: SetVector.h:269
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:476
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:356
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:182
bool empty() const
Definition: SmallVector.h:82
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
void set(unsigned Idx)
bool test(unsigned Idx) const
void reset(unsigned Idx)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:1157
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:663
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
char & SIOptimizeVGPRLiveRangeLegacyID
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
FunctionPass * createSIOptimizeVGPRLiveRangeLegacyPass()
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1916
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
VarInfo - This represents the regions where a virtual register is live in the program.
Definition: LiveVariables.h:79
std::vector< MachineInstr * > Kills
Kills - List of MachineInstruction's which are the last use of this virtual register (kill it) in the...
Definition: LiveVariables.h:89
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
Definition: LiveVariables.h:84
LLVM_ABI bool isLiveIn(const MachineBasicBlock &MBB, Register Reg, MachineRegisterInfo &MRI)
isLiveIn - Is Reg live in to MBB? This means that Reg is live through MBB, or it is killed in MBB.