LLVM 22.0.0git
SimpleLoopUnswitch.cpp
Go to the documentation of this file.
1///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/DenseMap.h"
11#include "llvm/ADT/STLExtras.h"
12#include "llvm/ADT/Sequence.h"
13#include "llvm/ADT/SetVector.h"
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/Twine.h"
20#include "llvm/Analysis/CFG.h"
34#include "llvm/IR/BasicBlock.h"
35#include "llvm/IR/Constant.h"
36#include "llvm/IR/Constants.h"
37#include "llvm/IR/Dominators.h"
38#include "llvm/IR/Function.h"
39#include "llvm/IR/IRBuilder.h"
40#include "llvm/IR/InstrTypes.h"
41#include "llvm/IR/Instruction.h"
44#include "llvm/IR/Module.h"
47#include "llvm/IR/Use.h"
48#include "llvm/IR/Value.h"
51#include "llvm/Support/Debug.h"
62#include <algorithm>
63#include <cassert>
64#include <iterator>
65#include <numeric>
66#include <optional>
67#include <utility>
68
69#define DEBUG_TYPE "simple-loop-unswitch"
70
71using namespace llvm;
72using namespace llvm::PatternMatch;
73
74STATISTIC(NumBranches, "Number of branches unswitched");
75STATISTIC(NumSwitches, "Number of switches unswitched");
76STATISTIC(NumSelects, "Number of selects turned into branches for unswitching");
77STATISTIC(NumGuards, "Number of guards turned into branches for unswitching");
78STATISTIC(NumTrivial, "Number of unswitches that are trivial");
80 NumCostMultiplierSkipped,
81 "Number of unswitch candidates that had their cost multiplier skipped");
82STATISTIC(NumInvariantConditionsInjected,
83 "Number of invariant conditions injected and unswitched");
84
86 "enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
87 cl::desc("Forcibly enables non-trivial loop unswitching rather than "
88 "following the configuration passed into the pass."));
89
90static cl::opt<int>
91 UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
92 cl::desc("The cost threshold for unswitching a loop."));
93
95 "enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden,
96 cl::desc("Enable unswitch cost multiplier that prohibits exponential "
97 "explosion in nontrivial unswitch."));
99 "unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden,
100 cl::desc("Toplevel siblings divisor for cost multiplier."));
102 "unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden,
103 cl::desc("Number of unswitch candidates that are ignored when calculating "
104 "cost multiplier."));
106 "simple-loop-unswitch-guards", cl::init(true), cl::Hidden,
107 cl::desc("If enabled, simple loop unswitching will also consider "
108 "llvm.experimental.guard intrinsics as unswitch candidates."));
110 "simple-loop-unswitch-drop-non-trivial-implicit-null-checks",
111 cl::init(false), cl::Hidden,
112 cl::desc("If enabled, drop make.implicit metadata in unswitched implicit "
113 "null checks to save time analyzing if we can keep it."));
115 MSSAThreshold("simple-loop-unswitch-memoryssa-threshold",
116 cl::desc("Max number of memory uses to explore during "
117 "partial unswitching analysis"),
118 cl::init(100), cl::Hidden);
120 "freeze-loop-unswitch-cond", cl::init(true), cl::Hidden,
121 cl::desc("If enabled, the freeze instruction will be added to condition "
122 "of loop unswitch to prevent miscompilation."));
123
125 "simple-loop-unswitch-inject-invariant-conditions", cl::Hidden,
126 cl::desc("Whether we should inject new invariants and unswitch them to "
127 "eliminate some existing (non-invariant) conditions."),
128 cl::init(true));
129
131 "simple-loop-unswitch-inject-invariant-condition-hotness-threshold",
132 cl::Hidden, cl::desc("Only try to inject loop invariant conditions and "
133 "unswitch on them to eliminate branches that are "
134 "not-taken 1/<this option> times or less."),
135 cl::init(16));
136
138namespace {
139struct CompareDesc {
140 BranchInst *Term;
141 Value *Invariant;
142 BasicBlock *InLoopSucc;
143
144 CompareDesc(BranchInst *Term, Value *Invariant, BasicBlock *InLoopSucc)
145 : Term(Term), Invariant(Invariant), InLoopSucc(InLoopSucc) {}
146};
147
148struct InjectedInvariant {
150 Value *LHS;
151 Value *RHS;
152 BasicBlock *InLoopSucc;
153
154 InjectedInvariant(ICmpInst::Predicate Pred, Value *LHS, Value *RHS,
155 BasicBlock *InLoopSucc)
156 : Pred(Pred), LHS(LHS), RHS(RHS), InLoopSucc(InLoopSucc) {}
157};
158
159struct NonTrivialUnswitchCandidate {
160 Instruction *TI = nullptr;
161 TinyPtrVector<Value *> Invariants;
162 std::optional<InstructionCost> Cost;
163 std::optional<InjectedInvariant> PendingInjection;
164 NonTrivialUnswitchCandidate(
165 Instruction *TI, ArrayRef<Value *> Invariants,
166 std::optional<InstructionCost> Cost = std::nullopt,
167 std::optional<InjectedInvariant> PendingInjection = std::nullopt)
168 : TI(TI), Invariants(Invariants), Cost(Cost),
169 PendingInjection(PendingInjection) {};
170
171 bool hasPendingInjection() const { return PendingInjection.has_value(); }
172};
173} // end anonymous namespace.
174
175// Helper to skip (select x, true, false), which matches both a logical AND and
176// OR and can confuse code that tries to determine if \p Cond is either a
177// logical AND or OR but not both.
179 Value *CondNext;
180 while (match(Cond, m_Select(m_Value(CondNext), m_One(), m_Zero())))
181 Cond = CondNext;
182 return Cond;
183}
184
185/// Collect all of the loop invariant input values transitively used by the
186/// homogeneous instruction graph from a given root.
187///
188/// This essentially walks from a root recursively through loop variant operands
189/// which have perform the same logical operation (AND or OR) and finds all
190/// inputs which are loop invariant. For some operations these can be
191/// re-associated and unswitched out of the loop entirely.
194 const LoopInfo &LI) {
195 assert(!L.isLoopInvariant(&Root) &&
196 "Only need to walk the graph if root itself is not invariant.");
197 TinyPtrVector<Value *> Invariants;
198
199 bool IsRootAnd = match(&Root, m_LogicalAnd());
200 bool IsRootOr = match(&Root, m_LogicalOr());
201
202 // Build a worklist and recurse through operators collecting invariants.
205 Worklist.push_back(&Root);
206 Visited.insert(&Root);
207 do {
208 Instruction &I = *Worklist.pop_back_val();
209 for (Value *OpV : I.operand_values()) {
210 // Skip constants as unswitching isn't interesting for them.
211 if (isa<Constant>(OpV))
212 continue;
213
214 // Add it to our result if loop invariant.
215 if (L.isLoopInvariant(OpV)) {
216 Invariants.push_back(OpV);
217 continue;
218 }
219
220 // If not an instruction with the same opcode, nothing we can do.
221 Instruction *OpI = dyn_cast<Instruction>(skipTrivialSelect(OpV));
222
223 if (OpI && ((IsRootAnd && match(OpI, m_LogicalAnd())) ||
224 (IsRootOr && match(OpI, m_LogicalOr())))) {
225 // Visit this operand.
226 if (Visited.insert(OpI).second)
227 Worklist.push_back(OpI);
228 }
229 }
230 } while (!Worklist.empty());
231
232 return Invariants;
233}
234
235static void replaceLoopInvariantUses(const Loop &L, Value *Invariant,
236 Constant &Replacement) {
237 assert(!isa<Constant>(Invariant) && "Why are we unswitching on a constant?");
238
239 // Replace uses of LIC in the loop with the given constant.
240 // We use make_early_inc_range as set invalidates the iterator.
241 for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
242 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
243
244 // Replace this use within the loop body.
245 if (UserI && L.contains(UserI))
246 U.set(&Replacement);
247 }
248}
249
250/// Check that all the LCSSA PHI nodes in the loop exit block have trivial
251/// incoming values along this edge.
253 const BasicBlock &ExitingBB,
254 const BasicBlock &ExitBB) {
255 for (const Instruction &I : ExitBB) {
256 auto *PN = dyn_cast<PHINode>(&I);
257 if (!PN)
258 // No more PHIs to check.
259 return true;
260
261 // If the incoming value for this edge isn't loop invariant the unswitch
262 // won't be trivial.
263 if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
264 return false;
265 }
266 llvm_unreachable("Basic blocks should never be empty!");
267}
268
269/// Copy a set of loop invariant values \p ToDuplicate and insert them at the
270/// end of \p BB and conditionally branch on the copied condition. We only
271/// branch on a single value.
273 BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction,
274 BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze,
275 const Instruction *I, AssumptionCache *AC, const DominatorTree &DT) {
276 IRBuilder<> IRB(&BB);
278
279 SmallVector<Value *> FrozenInvariants;
280 for (Value *Inv : Invariants) {
281 if (InsertFreeze && !isGuaranteedNotToBeUndefOrPoison(Inv, AC, I, &DT))
282 Inv = IRB.CreateFreeze(Inv, Inv->getName() + ".fr");
283 FrozenInvariants.push_back(Inv);
284 }
285
286 Value *Cond = Direction ? IRB.CreateOr(FrozenInvariants)
287 : IRB.CreateAnd(FrozenInvariants);
288 IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
289 Direction ? &NormalSucc : &UnswitchedSucc);
290}
291
292/// Copy a set of loop invariant values, and conditionally branch on them.
294 BasicBlock &BB, ArrayRef<Value *> ToDuplicate, bool Direction,
295 BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L,
296 MemorySSAUpdater *MSSAU) {
298 for (auto *Val : reverse(ToDuplicate)) {
299 Instruction *Inst = cast<Instruction>(Val);
300 Instruction *NewInst = Inst->clone();
301
302 if (const DebugLoc &DL = Inst->getDebugLoc())
303 mapAtomInstance(DL, VMap);
304
305 NewInst->insertInto(&BB, BB.end());
306 RemapInstruction(NewInst, VMap,
308 VMap[Val] = NewInst;
309
310 if (!MSSAU)
311 continue;
312
313 MemorySSA *MSSA = MSSAU->getMemorySSA();
314 if (auto *MemUse =
315 dyn_cast_or_null<MemoryUse>(MSSA->getMemoryAccess(Inst))) {
316 auto *DefiningAccess = MemUse->getDefiningAccess();
317 // Get the first defining access before the loop.
318 while (L.contains(DefiningAccess->getBlock())) {
319 // If the defining access is a MemoryPhi, get the incoming
320 // value for the pre-header as defining access.
321 if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess))
322 DefiningAccess =
323 MemPhi->getIncomingValueForBlock(L.getLoopPreheader());
324 else
325 DefiningAccess = cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
326 }
327 MSSAU->createMemoryAccessInBB(NewInst, DefiningAccess,
328 NewInst->getParent(),
330 }
331 }
332
333 IRBuilder<> IRB(&BB);
335 Value *Cond = VMap[ToDuplicate[0]];
336 IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
337 Direction ? &NormalSucc : &UnswitchedSucc);
338}
339
340/// Rewrite the PHI nodes in an unswitched loop exit basic block.
341///
342/// Requires that the loop exit and unswitched basic block are the same, and
343/// that the exiting block was a unique predecessor of that block. Rewrites the
344/// PHI nodes in that block such that what were LCSSA PHI nodes become trivial
345/// PHI nodes from the old preheader that now contains the unswitched
346/// terminator.
348 BasicBlock &OldExitingBB,
349 BasicBlock &OldPH) {
350 for (PHINode &PN : UnswitchedBB.phis()) {
351 // When the loop exit is directly unswitched we just need to update the
352 // incoming basic block. We loop to handle weird cases with repeated
353 // incoming blocks, but expect to typically only have one operand here.
354 for (auto i : seq<int>(0, PN.getNumOperands())) {
355 assert(PN.getIncomingBlock(i) == &OldExitingBB &&
356 "Found incoming block different from unique predecessor!");
357 PN.setIncomingBlock(i, &OldPH);
358 }
359 }
360}
361
362/// Rewrite the PHI nodes in the loop exit basic block and the split off
363/// unswitched block.
364///
365/// Because the exit block remains an exit from the loop, this rewrites the
366/// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI
367/// nodes into the unswitched basic block to select between the value in the
368/// old preheader and the loop exit.
370 BasicBlock &UnswitchedBB,
371 BasicBlock &OldExitingBB,
372 BasicBlock &OldPH,
373 bool FullUnswitch) {
374 assert(&ExitBB != &UnswitchedBB &&
375 "Must have different loop exit and unswitched blocks!");
376 BasicBlock::iterator InsertPt = UnswitchedBB.begin();
377 for (PHINode &PN : ExitBB.phis()) {
378 auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2,
379 PN.getName() + ".split");
380 NewPN->insertBefore(InsertPt);
381
382 // Walk backwards over the old PHI node's inputs to minimize the cost of
383 // removing each one. We have to do this weird loop manually so that we
384 // create the same number of new incoming edges in the new PHI as we expect
385 // each case-based edge to be included in the unswitched switch in some
386 // cases.
387 // FIXME: This is really, really gross. It would be much cleaner if LLVM
388 // allowed us to create a single entry for a predecessor block without
389 // having separate entries for each "edge" even though these edges are
390 // required to produce identical results.
391 for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
392 if (PN.getIncomingBlock(i) != &OldExitingBB)
393 continue;
394
395 Value *Incoming = PN.getIncomingValue(i);
396 if (FullUnswitch)
397 // No more edge from the old exiting block to the exit block.
398 PN.removeIncomingValue(i);
399
400 NewPN->addIncoming(Incoming, &OldPH);
401 }
402
403 // Now replace the old PHI with the new one and wire the old one in as an
404 // input to the new one.
405 PN.replaceAllUsesWith(NewPN);
406 NewPN->addIncoming(&PN, &ExitBB);
407 }
408}
409
410/// Hoist the current loop up to the innermost loop containing a remaining exit.
411///
412/// Because we've removed an exit from the loop, we may have changed the set of
413/// loops reachable and need to move the current loop up the loop nest or even
414/// to an entirely separate nest.
415static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
416 DominatorTree &DT, LoopInfo &LI,
417 MemorySSAUpdater *MSSAU, ScalarEvolution *SE) {
418 // If the loop is already at the top level, we can't hoist it anywhere.
419 Loop *OldParentL = L.getParentLoop();
420 if (!OldParentL)
421 return;
422
424 L.getExitBlocks(Exits);
425 Loop *NewParentL = nullptr;
426 for (auto *ExitBB : Exits)
427 if (Loop *ExitL = LI.getLoopFor(ExitBB))
428 if (!NewParentL || NewParentL->contains(ExitL))
429 NewParentL = ExitL;
430
431 if (NewParentL == OldParentL)
432 return;
433
434 // The new parent loop (if different) should always contain the old one.
435 if (NewParentL)
436 assert(NewParentL->contains(OldParentL) &&
437 "Can only hoist this loop up the nest!");
438
439 // The preheader will need to move with the body of this loop. However,
440 // because it isn't in this loop we also need to update the primary loop map.
441 assert(OldParentL == LI.getLoopFor(&Preheader) &&
442 "Parent loop of this loop should contain this loop's preheader!");
443 LI.changeLoopFor(&Preheader, NewParentL);
444
445 // Remove this loop from its old parent.
446 OldParentL->removeChildLoop(&L);
447
448 // Add the loop either to the new parent or as a top-level loop.
449 if (NewParentL)
450 NewParentL->addChildLoop(&L);
451 else
452 LI.addTopLevelLoop(&L);
453
454 // Remove this loops blocks from the old parent and every other loop up the
455 // nest until reaching the new parent. Also update all of these
456 // no-longer-containing loops to reflect the nesting change.
457 for (Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;
458 OldContainingL = OldContainingL->getParentLoop()) {
459 llvm::erase_if(OldContainingL->getBlocksVector(),
460 [&](const BasicBlock *BB) {
461 return BB == &Preheader || L.contains(BB);
462 });
463
464 OldContainingL->getBlocksSet().erase(&Preheader);
465 for (BasicBlock *BB : L.blocks())
466 OldContainingL->getBlocksSet().erase(BB);
467
468 // Because we just hoisted a loop out of this one, we have essentially
469 // created new exit paths from it. That means we need to form LCSSA PHI
470 // nodes for values used in the no-longer-nested loop.
471 formLCSSA(*OldContainingL, DT, &LI, SE);
472
473 // We shouldn't need to form dedicated exits because the exit introduced
474 // here is the (just split by unswitching) preheader. However, after trivial
475 // unswitching it is possible to get new non-dedicated exits out of parent
476 // loop so let's conservatively form dedicated exit blocks and figure out
477 // if we can optimize later.
478 formDedicatedExitBlocks(OldContainingL, &DT, &LI, MSSAU,
479 /*PreserveLCSSA*/ true);
480 }
481}
482
483// Return the top-most loop containing ExitBB and having ExitBB as exiting block
484// or the loop containing ExitBB, if there is no parent loop containing ExitBB
485// as exiting block.
487 const LoopInfo &LI) {
488 Loop *TopMost = LI.getLoopFor(ExitBB);
489 Loop *Current = TopMost;
490 while (Current) {
491 if (Current->isLoopExiting(ExitBB))
492 TopMost = Current;
493 Current = Current->getParentLoop();
494 }
495 return TopMost;
496}
497
498/// Unswitch a trivial branch if the condition is loop invariant.
499///
500/// This routine should only be called when loop code leading to the branch has
501/// been validated as trivial (no side effects). This routine checks if the
502/// condition is invariant and one of the successors is a loop exit. This
503/// allows us to unswitch without duplicating the loop, making it trivial.
504///
505/// If this routine fails to unswitch the branch it returns false.
506///
507/// If the branch can be unswitched, this routine splits the preheader and
508/// hoists the branch above that split. Preserves loop simplified form
509/// (splitting the exit block as necessary). It simplifies the branch within
510/// the loop to an unconditional branch but doesn't remove it entirely. Further
511/// cleanup can be done with some simplifycfg like pass.
512///
513/// If `SE` is not null, it will be updated based on the potential loop SCEVs
514/// invalidated by this.
516 LoopInfo &LI, ScalarEvolution *SE,
517 MemorySSAUpdater *MSSAU) {
518 assert(BI.isConditional() && "Can only unswitch a conditional branch!");
519 LLVM_DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n");
520
521 // The loop invariant values that we want to unswitch.
522 TinyPtrVector<Value *> Invariants;
523
524 // When true, we're fully unswitching the branch rather than just unswitching
525 // some input conditions to the branch.
526 bool FullUnswitch = false;
527
529 if (L.isLoopInvariant(Cond)) {
530 Invariants.push_back(Cond);
531 FullUnswitch = true;
532 } else {
533 if (auto *CondInst = dyn_cast<Instruction>(Cond))
534 Invariants = collectHomogenousInstGraphLoopInvariants(L, *CondInst, LI);
535 if (Invariants.empty()) {
536 LLVM_DEBUG(dbgs() << " Couldn't find invariant inputs!\n");
537 return false;
538 }
539 }
540
541 // Check that one of the branch's successors exits, and which one.
542 bool ExitDirection = true;
543 int LoopExitSuccIdx = 0;
544 auto *LoopExitBB = BI.getSuccessor(0);
545 if (L.contains(LoopExitBB)) {
546 ExitDirection = false;
547 LoopExitSuccIdx = 1;
548 LoopExitBB = BI.getSuccessor(1);
549 if (L.contains(LoopExitBB)) {
550 LLVM_DEBUG(dbgs() << " Branch doesn't exit the loop!\n");
551 return false;
552 }
553 }
554 auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
555 auto *ParentBB = BI.getParent();
556 if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB)) {
557 LLVM_DEBUG(dbgs() << " Loop exit PHI's aren't loop-invariant!\n");
558 return false;
559 }
560
561 // When unswitching only part of the branch's condition, we need the exit
562 // block to be reached directly from the partially unswitched input. This can
563 // be done when the exit block is along the true edge and the branch condition
564 // is a graph of `or` operations, or the exit block is along the false edge
565 // and the condition is a graph of `and` operations.
566 if (!FullUnswitch) {
567 if (ExitDirection ? !match(Cond, m_LogicalOr())
568 : !match(Cond, m_LogicalAnd())) {
569 LLVM_DEBUG(dbgs() << " Branch condition is in improper form for "
570 "non-full unswitch!\n");
571 return false;
572 }
573 }
574
575 LLVM_DEBUG({
576 dbgs() << " unswitching trivial invariant conditions for: " << BI
577 << "\n";
578 for (Value *Invariant : Invariants) {
579 dbgs() << " " << *Invariant << " == true";
580 if (Invariant != Invariants.back())
581 dbgs() << " ||";
582 dbgs() << "\n";
583 }
584 });
585
586 // If we have scalar evolutions, we need to invalidate them including this
587 // loop, the loop containing the exit block and the topmost parent loop
588 // exiting via LoopExitBB.
589 if (SE) {
590 if (const Loop *ExitL = getTopMostExitingLoop(LoopExitBB, LI))
591 SE->forgetLoop(ExitL);
592 else
593 // Forget the entire nest as this exits the entire nest.
594 SE->forgetTopmostLoop(&L);
596 }
597
598 if (MSSAU && VerifyMemorySSA)
599 MSSAU->getMemorySSA()->verifyMemorySSA();
600
601 // Split the preheader, so that we know that there is a safe place to insert
602 // the conditional branch. We will change the preheader to have a conditional
603 // branch on LoopCond.
604 BasicBlock *OldPH = L.getLoopPreheader();
605 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
606
607 // Now that we have a place to insert the conditional branch, create a place
608 // to branch to: this is the exit block out of the loop that we are
609 // unswitching. We need to split this if there are other loop predecessors.
610 // Because the loop is in simplified form, *any* other predecessor is enough.
611 BasicBlock *UnswitchedBB;
612 if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {
613 assert(LoopExitBB->getUniquePredecessor() == BI.getParent() &&
614 "A branch's parent isn't a predecessor!");
615 UnswitchedBB = LoopExitBB;
616 } else {
617 UnswitchedBB =
618 SplitBlock(LoopExitBB, LoopExitBB->begin(), &DT, &LI, MSSAU, "", false);
619 }
620
621 if (MSSAU && VerifyMemorySSA)
622 MSSAU->getMemorySSA()->verifyMemorySSA();
623
624 // Actually move the invariant uses into the unswitched position. If possible,
625 // we do this by moving the instructions, but when doing partial unswitching
626 // we do it by building a new merge of the values in the unswitched position.
627 OldPH->getTerminator()->eraseFromParent();
628 if (FullUnswitch) {
629 // If fully unswitching, we can use the existing branch instruction.
630 // Splice it into the old PH to gate reaching the new preheader and re-point
631 // its successors.
632 BI.moveBefore(*OldPH, OldPH->end());
633 BI.setCondition(Cond);
634 if (MSSAU) {
635 // Temporarily clone the terminator, to make MSSA update cheaper by
636 // separating "insert edge" updates from "remove edge" ones.
637 BI.clone()->insertInto(ParentBB, ParentBB->end());
638 } else {
639 // Create a new unconditional branch that will continue the loop as a new
640 // terminator.
641 Instruction *NewBI = BranchInst::Create(ContinueBB, ParentBB);
642 NewBI->setDebugLoc(BI.getDebugLoc());
643 }
644 BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
645 BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
646 } else {
647 // Only unswitching a subset of inputs to the condition, so we will need to
648 // build a new branch that merges the invariant inputs.
649 if (ExitDirection)
651 "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
652 "condition!");
653 else
655 "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
656 " condition!");
658 *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH,
659 FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT);
660 }
661
662 // Update the dominator tree with the added edge.
663 DT.insertEdge(OldPH, UnswitchedBB);
664
665 // After the dominator tree was updated with the added edge, update MemorySSA
666 // if available.
667 if (MSSAU) {
669 Updates.push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});
670 MSSAU->applyInsertUpdates(Updates, DT);
671 }
672
673 // Finish updating dominator tree and memory ssa for full unswitch.
674 if (FullUnswitch) {
675 if (MSSAU) {
676 Instruction *Term = ParentBB->getTerminator();
677 // Remove the cloned branch instruction and create unconditional branch
678 // now.
679 Instruction *NewBI = BranchInst::Create(ContinueBB, ParentBB);
680 NewBI->setDebugLoc(Term->getDebugLoc());
681 Term->eraseFromParent();
682 MSSAU->removeEdge(ParentBB, LoopExitBB);
683 }
684 DT.deleteEdge(ParentBB, LoopExitBB);
685 }
686
687 if (MSSAU && VerifyMemorySSA)
688 MSSAU->getMemorySSA()->verifyMemorySSA();
689
690 // Rewrite the relevant PHI nodes.
691 if (UnswitchedBB == LoopExitBB)
692 rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
693 else
694 rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
695 *ParentBB, *OldPH, FullUnswitch);
696
697 // The constant we can replace all of our invariants with inside the loop
698 // body. If any of the invariants have a value other than this the loop won't
699 // be entered.
700 ConstantInt *Replacement = ExitDirection
703
704 // Since this is an i1 condition we can also trivially replace uses of it
705 // within the loop with a constant.
706 for (Value *Invariant : Invariants)
707 replaceLoopInvariantUses(L, Invariant, *Replacement);
708
709 // If this was full unswitching, we may have changed the nesting relationship
710 // for this loop so hoist it to its correct parent if needed.
711 if (FullUnswitch)
712 hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
713
714 if (MSSAU && VerifyMemorySSA)
715 MSSAU->getMemorySSA()->verifyMemorySSA();
716
717 LLVM_DEBUG(dbgs() << " done: unswitching trivial branch...\n");
718 ++NumTrivial;
719 ++NumBranches;
720 return true;
721}
722
723/// Unswitch a trivial switch if the condition is loop invariant.
724///
725/// This routine should only be called when loop code leading to the switch has
726/// been validated as trivial (no side effects). This routine checks if the
727/// condition is invariant and that at least one of the successors is a loop
728/// exit. This allows us to unswitch without duplicating the loop, making it
729/// trivial.
730///
731/// If this routine fails to unswitch the switch it returns false.
732///
733/// If the switch can be unswitched, this routine splits the preheader and
734/// copies the switch above that split. If the default case is one of the
735/// exiting cases, it copies the non-exiting cases and points them at the new
736/// preheader. If the default case is not exiting, it copies the exiting cases
737/// and points the default at the preheader. It preserves loop simplified form
738/// (splitting the exit blocks as necessary). It simplifies the switch within
739/// the loop by removing now-dead cases. If the default case is one of those
740/// unswitched, it replaces its destination with a new basic block containing
741/// only unreachable. Such basic blocks, while technically loop exits, are not
742/// considered for unswitching so this is a stable transform and the same
743/// switch will not be revisited. If after unswitching there is only a single
744/// in-loop successor, the switch is further simplified to an unconditional
745/// branch. Still more cleanup can be done with some simplifycfg like pass.
746///
747/// If `SE` is not null, it will be updated based on the potential loop SCEVs
748/// invalidated by this.
750 LoopInfo &LI, ScalarEvolution *SE,
751 MemorySSAUpdater *MSSAU) {
752 LLVM_DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n");
753 Value *LoopCond = SI.getCondition();
754
755 // If this isn't switching on an invariant condition, we can't unswitch it.
756 if (!L.isLoopInvariant(LoopCond))
757 return false;
758
759 auto *ParentBB = SI.getParent();
760
761 // The same check must be used both for the default and the exit cases. We
762 // should never leave edges from the switch instruction to a basic block that
763 // we are unswitching, hence the condition used to determine the default case
764 // needs to also be used to populate ExitCaseIndices, which is then used to
765 // remove cases from the switch.
766 auto IsTriviallyUnswitchableExitBlock = [&](BasicBlock &BBToCheck) {
767 // BBToCheck is not an exit block if it is inside loop L.
768 if (L.contains(&BBToCheck))
769 return false;
770 // BBToCheck is not trivial to unswitch if its phis aren't loop invariant.
771 if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, BBToCheck))
772 return false;
773 // We do not unswitch a block that only has an unreachable statement, as
774 // it's possible this is a previously unswitched block. Only unswitch if
775 // either the terminator is not unreachable, or, if it is, it's not the only
776 // instruction in the block.
777 auto *TI = BBToCheck.getTerminator();
778 bool isUnreachable = isa<UnreachableInst>(TI);
779 return !isUnreachable || &*BBToCheck.getFirstNonPHIOrDbg() != TI;
780 };
781
782 SmallVector<int, 4> ExitCaseIndices;
783 for (auto Case : SI.cases())
784 if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor()))
785 ExitCaseIndices.push_back(Case.getCaseIndex());
786 BasicBlock *DefaultExitBB = nullptr;
789 if (IsTriviallyUnswitchableExitBlock(*SI.getDefaultDest())) {
790 DefaultExitBB = SI.getDefaultDest();
791 } else if (ExitCaseIndices.empty())
792 return false;
793
794 LLVM_DEBUG(dbgs() << " unswitching trivial switch...\n");
795
796 if (MSSAU && VerifyMemorySSA)
797 MSSAU->getMemorySSA()->verifyMemorySSA();
798
799 // We may need to invalidate SCEVs for the outermost loop reached by any of
800 // the exits.
801 Loop *OuterL = &L;
802
803 if (DefaultExitBB) {
804 // Check the loop containing this exit.
805 Loop *ExitL = getTopMostExitingLoop(DefaultExitBB, LI);
806 if (!ExitL || ExitL->contains(OuterL))
807 OuterL = ExitL;
808 }
809 for (unsigned Index : ExitCaseIndices) {
810 auto CaseI = SI.case_begin() + Index;
811 // Compute the outer loop from this exit.
812 Loop *ExitL = getTopMostExitingLoop(CaseI->getCaseSuccessor(), LI);
813 if (!ExitL || ExitL->contains(OuterL))
814 OuterL = ExitL;
815 }
816
817 if (SE) {
818 if (OuterL)
819 SE->forgetLoop(OuterL);
820 else
821 SE->forgetTopmostLoop(&L);
822 }
823
824 if (DefaultExitBB) {
825 // Clear out the default destination temporarily to allow accurate
826 // predecessor lists to be examined below.
827 SI.setDefaultDest(nullptr);
828 }
829
830 // Store the exit cases into a separate data structure and remove them from
831 // the switch.
832 SmallVector<std::tuple<ConstantInt *, BasicBlock *,
834 4> ExitCases;
835 ExitCases.reserve(ExitCaseIndices.size());
837 // We walk the case indices backwards so that we remove the last case first
838 // and don't disrupt the earlier indices.
839 for (unsigned Index : reverse(ExitCaseIndices)) {
840 auto CaseI = SI.case_begin() + Index;
841 // Save the value of this case.
842 auto W = SIW.getSuccessorWeight(CaseI->getSuccessorIndex());
843 ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
844 // Delete the unswitched cases.
845 SIW.removeCase(CaseI);
846 }
847
848 // Check if after this all of the remaining cases point at the same
849 // successor.
850 BasicBlock *CommonSuccBB = nullptr;
851 if (SI.getNumCases() > 0 &&
852 all_of(drop_begin(SI.cases()), [&SI](const SwitchInst::CaseHandle &Case) {
853 return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();
854 }))
855 CommonSuccBB = SI.case_begin()->getCaseSuccessor();
856 if (!DefaultExitBB) {
857 // If we're not unswitching the default, we need it to match any cases to
858 // have a common successor or if we have no cases it is the common
859 // successor.
860 if (SI.getNumCases() == 0)
861 CommonSuccBB = SI.getDefaultDest();
862 else if (SI.getDefaultDest() != CommonSuccBB)
863 CommonSuccBB = nullptr;
864 }
865
866 // Split the preheader, so that we know that there is a safe place to insert
867 // the switch.
868 BasicBlock *OldPH = L.getLoopPreheader();
869 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
870 OldPH->getTerminator()->eraseFromParent();
871
872 // Now add the unswitched switch. This new switch instruction inherits the
873 // debug location of the old switch, because it semantically replace the old
874 // one.
875 auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
876 NewSI->setDebugLoc(SIW->getDebugLoc());
877 SwitchInstProfUpdateWrapper NewSIW(*NewSI);
878
879 // Rewrite the IR for the unswitched basic blocks. This requires two steps.
880 // First, we split any exit blocks with remaining in-loop predecessors. Then
881 // we update the PHIs in one of two ways depending on if there was a split.
882 // We walk in reverse so that we split in the same order as the cases
883 // appeared. This is purely for convenience of reading the resulting IR, but
884 // it doesn't cost anything really.
885 SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;
887 // Handle the default exit if necessary.
888 // FIXME: It'd be great if we could merge this with the loop below but LLVM's
889 // ranges aren't quite powerful enough yet.
890 if (DefaultExitBB) {
891 if (pred_empty(DefaultExitBB)) {
892 UnswitchedExitBBs.insert(DefaultExitBB);
893 rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
894 } else {
895 auto *SplitBB =
896 SplitBlock(DefaultExitBB, DefaultExitBB->begin(), &DT, &LI, MSSAU);
897 rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
898 *ParentBB, *OldPH,
899 /*FullUnswitch*/ true);
900 DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
901 }
902 }
903 // Note that we must use a reference in the for loop so that we update the
904 // container.
905 for (auto &ExitCase : reverse(ExitCases)) {
906 // Grab a reference to the exit block in the pair so that we can update it.
907 BasicBlock *ExitBB = std::get<1>(ExitCase);
908
909 // If this case is the last edge into the exit block, we can simply reuse it
910 // as it will no longer be a loop exit. No mapping necessary.
911 if (pred_empty(ExitBB)) {
912 // Only rewrite once.
913 if (UnswitchedExitBBs.insert(ExitBB).second)
914 rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);
915 continue;
916 }
917
918 // Otherwise we need to split the exit block so that we retain an exit
919 // block from the loop and a target for the unswitched condition.
920 BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
921 if (!SplitExitBB) {
922 // If this is the first time we see this, do the split and remember it.
923 SplitExitBB = SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU);
924 rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
925 *ParentBB, *OldPH,
926 /*FullUnswitch*/ true);
927 }
928 // Update the case pair to point to the split block.
929 std::get<1>(ExitCase) = SplitExitBB;
930 }
931
932 // Now add the unswitched cases. We do this in reverse order as we built them
933 // in reverse order.
934 for (auto &ExitCase : reverse(ExitCases)) {
935 ConstantInt *CaseVal = std::get<0>(ExitCase);
936 BasicBlock *UnswitchedBB = std::get<1>(ExitCase);
937
938 NewSIW.addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
939 }
940
941 // If the default was unswitched, re-point it and add explicit cases for
942 // entering the loop.
943 if (DefaultExitBB) {
944 NewSIW->setDefaultDest(DefaultExitBB);
945 NewSIW.setSuccessorWeight(0, DefaultCaseWeight);
946
947 // We removed all the exit cases, so we just copy the cases to the
948 // unswitched switch.
949 for (const auto &Case : SI.cases())
950 NewSIW.addCase(Case.getCaseValue(), NewPH,
952 } else if (DefaultCaseWeight) {
953 // We have to set branch weight of the default case.
954 uint64_t SW = *DefaultCaseWeight;
955 for (const auto &Case : SI.cases()) {
956 auto W = SIW.getSuccessorWeight(Case.getSuccessorIndex());
957 assert(W &&
958 "case weight must be defined as default case weight is defined");
959 SW += *W;
960 }
961 NewSIW.setSuccessorWeight(0, SW);
962 }
963
964 // If we ended up with a common successor for every path through the switch
965 // after unswitching, rewrite it to an unconditional branch to make it easy
966 // to recognize. Otherwise we potentially have to recognize the default case
967 // pointing at unreachable and other complexity.
968 if (CommonSuccBB) {
969 BasicBlock *BB = SI.getParent();
970 // We may have had multiple edges to this common successor block, so remove
971 // them as predecessors. We skip the first one, either the default or the
972 // actual first case.
973 bool SkippedFirst = DefaultExitBB == nullptr;
974 for (auto Case : SI.cases()) {
975 assert(Case.getCaseSuccessor() == CommonSuccBB &&
976 "Non-common successor!");
977 (void)Case;
978 if (!SkippedFirst) {
979 SkippedFirst = true;
980 continue;
981 }
982 CommonSuccBB->removePredecessor(BB,
983 /*KeepOneInputPHIs*/ true);
984 }
985 // Now nuke the switch and replace it with a direct branch.
986 Instruction *NewBI = BranchInst::Create(CommonSuccBB, BB);
987 NewBI->setDebugLoc(SIW->getDebugLoc());
988 SIW.eraseFromParent();
989 } else if (DefaultExitBB) {
990 assert(SI.getNumCases() > 0 &&
991 "If we had no cases we'd have a common successor!");
992 // Move the last case to the default successor. This is valid as if the
993 // default got unswitched it cannot be reached. This has the advantage of
994 // being simple and keeping the number of edges from this switch to
995 // successors the same, and avoiding any PHI update complexity.
996 auto LastCaseI = std::prev(SI.case_end());
997
998 SI.setDefaultDest(LastCaseI->getCaseSuccessor());
1000 0, SIW.getSuccessorWeight(LastCaseI->getSuccessorIndex()));
1001 SIW.removeCase(LastCaseI);
1002 }
1003
1004 // Walk the unswitched exit blocks and the unswitched split blocks and update
1005 // the dominator tree based on the CFG edits. While we are walking unordered
1006 // containers here, the API for applyUpdates takes an unordered list of
1007 // updates and requires them to not contain duplicates.
1009 for (auto *UnswitchedExitBB : UnswitchedExitBBs) {
1010 DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedExitBB});
1011 DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB});
1012 }
1013 for (auto SplitUnswitchedPair : SplitExitBBMap) {
1014 DTUpdates.push_back({DT.Delete, ParentBB, SplitUnswitchedPair.first});
1015 DTUpdates.push_back({DT.Insert, OldPH, SplitUnswitchedPair.second});
1016 }
1017
1018 if (MSSAU) {
1019 MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true);
1020 if (VerifyMemorySSA)
1021 MSSAU->getMemorySSA()->verifyMemorySSA();
1022 } else {
1023 DT.applyUpdates(DTUpdates);
1024 }
1025
1026 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
1027
1028 // We may have changed the nesting relationship for this loop so hoist it to
1029 // its correct parent if needed.
1030 hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
1031
1032 if (MSSAU && VerifyMemorySSA)
1033 MSSAU->getMemorySSA()->verifyMemorySSA();
1034
1035 ++NumTrivial;
1036 ++NumSwitches;
1037 LLVM_DEBUG(dbgs() << " done: unswitching trivial switch...\n");
1038 return true;
1039}
1040
1041/// This routine scans the loop to find a branch or switch which occurs before
1042/// any side effects occur. These can potentially be unswitched without
1043/// duplicating the loop. If a branch or switch is successfully unswitched the
1044/// scanning continues to see if subsequent branches or switches have become
1045/// trivial. Once all trivial candidates have been unswitched, this routine
1046/// returns.
1047///
1048/// The return value indicates whether anything was unswitched (and therefore
1049/// changed).
1050///
1051/// If `SE` is not null, it will be updated based on the potential loop SCEVs
1052/// invalidated by this.
1054 LoopInfo &LI, ScalarEvolution *SE,
1055 MemorySSAUpdater *MSSAU) {
1056 bool Changed = false;
1057
1058 // If loop header has only one reachable successor we should keep looking for
1059 // trivial condition candidates in the successor as well. An alternative is
1060 // to constant fold conditions and merge successors into loop header (then we
1061 // only need to check header's terminator). The reason for not doing this in
1062 // LoopUnswitch pass is that it could potentially break LoopPassManager's
1063 // invariants. Folding dead branches could either eliminate the current loop
1064 // or make other loops unreachable. LCSSA form might also not be preserved
1065 // after deleting branches. The following code keeps traversing loop header's
1066 // successors until it finds the trivial condition candidate (condition that
1067 // is not a constant). Since unswitching generates branches with constant
1068 // conditions, this scenario could be very common in practice.
1069 BasicBlock *CurrentBB = L.getHeader();
1071 Visited.insert(CurrentBB);
1072 do {
1073 // Check if there are any side-effecting instructions (e.g. stores, calls,
1074 // volatile loads) in the part of the loop that the code *would* execute
1075 // without unswitching.
1076 if (MSSAU) // Possible early exit with MSSA
1077 if (auto *Defs = MSSAU->getMemorySSA()->getBlockDefs(CurrentBB))
1078 if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))
1079 return Changed;
1080 if (llvm::any_of(*CurrentBB,
1081 [](Instruction &I) { return I.mayHaveSideEffects(); }))
1082 return Changed;
1083
1084 Instruction *CurrentTerm = CurrentBB->getTerminator();
1085
1086 if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
1087 // Don't bother trying to unswitch past a switch with a constant
1088 // condition. This should be removed prior to running this pass by
1089 // simplifycfg.
1090 if (isa<Constant>(SI->getCondition()))
1091 return Changed;
1092
1093 if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE, MSSAU))
1094 // Couldn't unswitch this one so we're done.
1095 return Changed;
1096
1097 // Mark that we managed to unswitch something.
1098 Changed = true;
1099
1100 // If unswitching turned the terminator into an unconditional branch then
1101 // we can continue. The unswitching logic specifically works to fold any
1102 // cases it can into an unconditional branch to make it easier to
1103 // recognize here.
1104 auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());
1105 if (!BI || BI->isConditional())
1106 return Changed;
1107
1108 CurrentBB = BI->getSuccessor(0);
1109 continue;
1110 }
1111
1112 auto *BI = dyn_cast<BranchInst>(CurrentTerm);
1113 if (!BI)
1114 // We do not understand other terminator instructions.
1115 return Changed;
1116
1117 // Don't bother trying to unswitch past an unconditional branch or a branch
1118 // with a constant value. These should be removed by simplifycfg prior to
1119 // running this pass.
1120 if (!BI->isConditional() ||
1121 isa<Constant>(skipTrivialSelect(BI->getCondition())))
1122 return Changed;
1123
1124 // Found a trivial condition candidate: non-foldable conditional branch. If
1125 // we fail to unswitch this, we can't do anything else that is trivial.
1126 if (!unswitchTrivialBranch(L, *BI, DT, LI, SE, MSSAU))
1127 return Changed;
1128
1129 // Mark that we managed to unswitch something.
1130 Changed = true;
1131
1132 // If we only unswitched some of the conditions feeding the branch, we won't
1133 // have collapsed it to a single successor.
1134 BI = cast<BranchInst>(CurrentBB->getTerminator());
1135 if (BI->isConditional())
1136 return Changed;
1137
1138 // Follow the newly unconditional branch into its successor.
1139 CurrentBB = BI->getSuccessor(0);
1140
1141 // When continuing, if we exit the loop or reach a previous visited block,
1142 // then we can not reach any trivial condition candidates (unfoldable
1143 // branch instructions or switch instructions) and no unswitch can happen.
1144 } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
1145
1146 return Changed;
1147}
1148
1149/// Build the cloned blocks for an unswitched copy of the given loop.
1150///
1151/// The cloned blocks are inserted before the loop preheader (`LoopPH`) and
1152/// after the split block (`SplitBB`) that will be used to select between the
1153/// cloned and original loop.
1154///
1155/// This routine handles cloning all of the necessary loop blocks and exit
1156/// blocks including rewriting their instructions and the relevant PHI nodes.
1157/// Any loop blocks or exit blocks which are dominated by a different successor
1158/// than the one for this clone of the loop blocks can be trivially skipped. We
1159/// use the `DominatingSucc` map to determine whether a block satisfies that
1160/// property with a simple map lookup.
1161///
1162/// It also correctly creates the unconditional branch in the cloned
1163/// unswitched parent block to only point at the unswitched successor.
1164///
1165/// This does not handle most of the necessary updates to `LoopInfo`. Only exit
1166/// block splitting is correctly reflected in `LoopInfo`, essentially all of
1167/// the cloned blocks (and their loops) are left without full `LoopInfo`
1168/// updates. This also doesn't fully update `DominatorTree`. It adds the cloned
1169/// blocks to them but doesn't create the cloned `DominatorTree` structure and
1170/// instead the caller must recompute an accurate DT. It *does* correctly
1171/// update the `AssumptionCache` provided in `AC`.
1173 Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB,
1174 ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB,
1175 BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB,
1177 ValueToValueMapTy &VMap,
1179 DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU,
1180 ScalarEvolution *SE) {
1182 NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());
1183
1184 // We will need to clone a bunch of blocks, wrap up the clone operation in
1185 // a helper.
1186 auto CloneBlock = [&](BasicBlock *OldBB) {
1187 // Clone the basic block and insert it before the new preheader.
1188 BasicBlock *NewBB = CloneBasicBlock(OldBB, VMap, ".us", OldBB->getParent());
1189 NewBB->moveBefore(LoopPH);
1190
1191 // Record this block and the mapping.
1192 NewBlocks.push_back(NewBB);
1193 VMap[OldBB] = NewBB;
1194
1195 return NewBB;
1196 };
1197
1198 // We skip cloning blocks when they have a dominating succ that is not the
1199 // succ we are cloning for.
1200 auto SkipBlock = [&](BasicBlock *BB) {
1201 auto It = DominatingSucc.find(BB);
1202 return It != DominatingSucc.end() && It->second != UnswitchedSuccBB;
1203 };
1204
1205 // First, clone the preheader.
1206 auto *ClonedPH = CloneBlock(LoopPH);
1207
1208 // Then clone all the loop blocks, skipping the ones that aren't necessary.
1209 for (auto *LoopBB : L.blocks())
1210 if (!SkipBlock(LoopBB))
1211 CloneBlock(LoopBB);
1212
1213 // Split all the loop exit edges so that when we clone the exit blocks, if
1214 // any of the exit blocks are *also* a preheader for some other loop, we
1215 // don't create multiple predecessors entering the loop header.
1216 for (auto *ExitBB : ExitBlocks) {
1217 if (SkipBlock(ExitBB))
1218 continue;
1219
1220 // When we are going to clone an exit, we don't need to clone all the
1221 // instructions in the exit block and we want to ensure we have an easy
1222 // place to merge the CFG, so split the exit first. This is always safe to
1223 // do because there cannot be any non-loop predecessors of a loop exit in
1224 // loop simplified form.
1225 auto *MergeBB = SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU);
1226
1227 // Rearrange the names to make it easier to write test cases by having the
1228 // exit block carry the suffix rather than the merge block carrying the
1229 // suffix.
1230 MergeBB->takeName(ExitBB);
1231 ExitBB->setName(Twine(MergeBB->getName()) + ".split");
1232
1233 // Now clone the original exit block.
1234 auto *ClonedExitBB = CloneBlock(ExitBB);
1235 assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&
1236 "Exit block should have been split to have one successor!");
1237 assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&
1238 "Cloned exit block has the wrong successor!");
1239
1240 // Remap any cloned instructions and create a merge phi node for them.
1241 for (auto ZippedInsts : llvm::zip_first(
1242 llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())),
1243 llvm::make_range(ClonedExitBB->begin(),
1244 std::prev(ClonedExitBB->end())))) {
1245 Instruction &I = std::get<0>(ZippedInsts);
1246 Instruction &ClonedI = std::get<1>(ZippedInsts);
1247
1248 // The only instructions in the exit block should be PHI nodes and
1249 // potentially a landing pad.
1250 assert(
1251 (isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) &&
1252 "Bad instruction in exit block!");
1253 // We should have a value map between the instruction and its clone.
1254 assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!");
1255
1256 // Forget SCEVs based on exit phis in case SCEV looked through the phi.
1257 if (SE)
1258 if (auto *PN = dyn_cast<PHINode>(&I))
1260
1261 BasicBlock::iterator InsertPt = MergeBB->getFirstInsertionPt();
1262
1263 auto *MergePN =
1264 PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi");
1265 MergePN->insertBefore(InsertPt);
1266 MergePN->setDebugLoc(InsertPt->getDebugLoc());
1267 I.replaceAllUsesWith(MergePN);
1268 MergePN->addIncoming(&I, ExitBB);
1269 MergePN->addIncoming(&ClonedI, ClonedExitBB);
1270 }
1271 }
1272
1273 // Rewrite the instructions in the cloned blocks to refer to the instructions
1274 // in the cloned blocks. We have to do this as a second pass so that we have
1275 // everything available. Also, we have inserted new instructions which may
1276 // include assume intrinsics, so we update the assumption cache while
1277 // processing this.
1278 Module *M = ClonedPH->getParent()->getParent();
1279 for (auto *ClonedBB : NewBlocks)
1280 for (Instruction &I : *ClonedBB) {
1281 RemapDbgRecordRange(M, I.getDbgRecordRange(), VMap,
1283 RemapInstruction(&I, VMap,
1285 if (auto *II = dyn_cast<AssumeInst>(&I))
1287 }
1288
1289 // Update any PHI nodes in the cloned successors of the skipped blocks to not
1290 // have spurious incoming values.
1291 for (auto *LoopBB : L.blocks())
1292 if (SkipBlock(LoopBB))
1293 for (auto *SuccBB : successors(LoopBB))
1294 if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)))
1295 for (PHINode &PN : ClonedSuccBB->phis())
1296 PN.removeIncomingValue(LoopBB, /*DeletePHIIfEmpty*/ false);
1297
1298 // Remove the cloned parent as a predecessor of any successor we ended up
1299 // cloning other than the unswitched one.
1300 auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB));
1301 for (auto *SuccBB : successors(ParentBB)) {
1302 if (SuccBB == UnswitchedSuccBB)
1303 continue;
1304
1305 auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB));
1306 if (!ClonedSuccBB)
1307 continue;
1308
1309 ClonedSuccBB->removePredecessor(ClonedParentBB,
1310 /*KeepOneInputPHIs*/ true);
1311 }
1312
1313 // Replace the cloned branch with an unconditional branch to the cloned
1314 // unswitched successor.
1315 auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
1316 Instruction *ClonedTerminator = ClonedParentBB->getTerminator();
1317 // Trivial Simplification. If Terminator is a conditional branch and
1318 // condition becomes dead - erase it.
1319 Value *ClonedConditionToErase = nullptr;
1320 if (auto *BI = dyn_cast<BranchInst>(ClonedTerminator))
1321 ClonedConditionToErase = BI->getCondition();
1322 else if (auto *SI = dyn_cast<SwitchInst>(ClonedTerminator))
1323 ClonedConditionToErase = SI->getCondition();
1324
1325 Instruction *BI = BranchInst::Create(ClonedSuccBB, ClonedParentBB);
1326 BI->setDebugLoc(ClonedTerminator->getDebugLoc());
1327 ClonedTerminator->eraseFromParent();
1328
1329 if (ClonedConditionToErase)
1330 RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase, nullptr,
1331 MSSAU);
1332
1333 // If there are duplicate entries in the PHI nodes because of multiple edges
1334 // to the unswitched successor, we need to nuke all but one as we replaced it
1335 // with a direct branch.
1336 for (PHINode &PN : ClonedSuccBB->phis()) {
1337 bool Found = false;
1338 // Loop over the incoming operands backwards so we can easily delete as we
1339 // go without invalidating the index.
1340 for (int i = PN.getNumOperands() - 1; i >= 0; --i) {
1341 if (PN.getIncomingBlock(i) != ClonedParentBB)
1342 continue;
1343 if (!Found) {
1344 Found = true;
1345 continue;
1346 }
1347 PN.removeIncomingValue(i, /*DeletePHIIfEmpty*/ false);
1348 }
1349 }
1350
1351 // Record the domtree updates for the new blocks.
1353 for (auto *ClonedBB : NewBlocks) {
1354 for (auto *SuccBB : successors(ClonedBB))
1355 if (SuccSet.insert(SuccBB).second)
1356 DTUpdates.push_back({DominatorTree::Insert, ClonedBB, SuccBB});
1357 SuccSet.clear();
1358 }
1359
1360 return ClonedPH;
1361}
1362
1363/// Recursively clone the specified loop and all of its children.
1364///
1365/// The target parent loop for the clone should be provided, or can be null if
1366/// the clone is a top-level loop. While cloning, all the blocks are mapped
1367/// with the provided value map. The entire original loop must be present in
1368/// the value map. The cloned loop is returned.
1369static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL,
1370 const ValueToValueMapTy &VMap, LoopInfo &LI) {
1371 auto AddClonedBlocksToLoop = [&](Loop &OrigL, Loop &ClonedL) {
1372 assert(ClonedL.getBlocks().empty() && "Must start with an empty loop!");
1373 ClonedL.reserveBlocks(OrigL.getNumBlocks());
1374 for (auto *BB : OrigL.blocks()) {
1375 auto *ClonedBB = cast<BasicBlock>(VMap.lookup(BB));
1376 ClonedL.addBlockEntry(ClonedBB);
1377 if (LI.getLoopFor(BB) == &OrigL)
1378 LI.changeLoopFor(ClonedBB, &ClonedL);
1379 }
1380 };
1381
1382 // We specially handle the first loop because it may get cloned into
1383 // a different parent and because we most commonly are cloning leaf loops.
1384 Loop *ClonedRootL = LI.AllocateLoop();
1385 if (RootParentL)
1386 RootParentL->addChildLoop(ClonedRootL);
1387 else
1388 LI.addTopLevelLoop(ClonedRootL);
1389 AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
1390
1391 if (OrigRootL.isInnermost())
1392 return ClonedRootL;
1393
1394 // If we have a nest, we can quickly clone the entire loop nest using an
1395 // iterative approach because it is a tree. We keep the cloned parent in the
1396 // data structure to avoid repeatedly querying through a map to find it.
1397 SmallVector<std::pair<Loop *, Loop *>, 16> LoopsToClone;
1398 // Build up the loops to clone in reverse order as we'll clone them from the
1399 // back.
1400 for (Loop *ChildL : llvm::reverse(OrigRootL))
1401 LoopsToClone.push_back({ClonedRootL, ChildL});
1402 do {
1403 Loop *ClonedParentL, *L;
1404 std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val();
1405 Loop *ClonedL = LI.AllocateLoop();
1406 ClonedParentL->addChildLoop(ClonedL);
1407 AddClonedBlocksToLoop(*L, *ClonedL);
1408 for (Loop *ChildL : llvm::reverse(*L))
1409 LoopsToClone.push_back({ClonedL, ChildL});
1410 } while (!LoopsToClone.empty());
1411
1412 return ClonedRootL;
1413}
1414
1415/// Build the cloned loops of an original loop from unswitching.
1416///
1417/// Because unswitching simplifies the CFG of the loop, this isn't a trivial
1418/// operation. We need to re-verify that there even is a loop (as the backedge
1419/// may not have been cloned), and even if there are remaining backedges the
1420/// backedge set may be different. However, we know that each child loop is
1421/// undisturbed, we only need to find where to place each child loop within
1422/// either any parent loop or within a cloned version of the original loop.
1423///
1424/// Because child loops may end up cloned outside of any cloned version of the
1425/// original loop, multiple cloned sibling loops may be created. All of them
1426/// are returned so that the newly introduced loop nest roots can be
1427/// identified.
1428static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
1429 const ValueToValueMapTy &VMap, LoopInfo &LI,
1430 SmallVectorImpl<Loop *> &NonChildClonedLoops) {
1431 Loop *ClonedL = nullptr;
1432
1433 auto *OrigPH = OrigL.getLoopPreheader();
1434 auto *OrigHeader = OrigL.getHeader();
1435
1436 auto *ClonedPH = cast<BasicBlock>(VMap.lookup(OrigPH));
1437 auto *ClonedHeader = cast<BasicBlock>(VMap.lookup(OrigHeader));
1438
1439 // We need to know the loops of the cloned exit blocks to even compute the
1440 // accurate parent loop. If we only clone exits to some parent of the
1441 // original parent, we want to clone into that outer loop. We also keep track
1442 // of the loops that our cloned exit blocks participate in.
1443 Loop *ParentL = nullptr;
1444 SmallVector<BasicBlock *, 4> ClonedExitsInLoops;
1446 ClonedExitsInLoops.reserve(ExitBlocks.size());
1447 for (auto *ExitBB : ExitBlocks)
1448 if (auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.lookup(ExitBB)))
1449 if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1450 ExitLoopMap[ClonedExitBB] = ExitL;
1451 ClonedExitsInLoops.push_back(ClonedExitBB);
1452 if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1453 ParentL = ExitL;
1454 }
1455 assert((!ParentL || ParentL == OrigL.getParentLoop() ||
1456 ParentL->contains(OrigL.getParentLoop())) &&
1457 "The computed parent loop should always contain (or be) the parent of "
1458 "the original loop.");
1459
1460 // We build the set of blocks dominated by the cloned header from the set of
1461 // cloned blocks out of the original loop. While not all of these will
1462 // necessarily be in the cloned loop, it is enough to establish that they
1463 // aren't in unreachable cycles, etc.
1464 SmallSetVector<BasicBlock *, 16> ClonedLoopBlocks;
1465 for (auto *BB : OrigL.blocks())
1466 if (auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB)))
1467 ClonedLoopBlocks.insert(ClonedBB);
1468
1469 // Rebuild the set of blocks that will end up in the cloned loop. We may have
1470 // skipped cloning some region of this loop which can in turn skip some of
1471 // the backedges so we have to rebuild the blocks in the loop based on the
1472 // backedges that remain after cloning.
1474 SmallPtrSet<BasicBlock *, 16> BlocksInClonedLoop;
1475 for (auto *Pred : predecessors(ClonedHeader)) {
1476 // The only possible non-loop header predecessor is the preheader because
1477 // we know we cloned the loop in simplified form.
1478 if (Pred == ClonedPH)
1479 continue;
1480
1481 // Because the loop was in simplified form, the only non-loop predecessor
1482 // should be the preheader.
1483 assert(ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop "
1484 "header other than the preheader "
1485 "that is not part of the loop!");
1486
1487 // Insert this block into the loop set and on the first visit (and if it
1488 // isn't the header we're currently walking) put it into the worklist to
1489 // recurse through.
1490 if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader)
1491 Worklist.push_back(Pred);
1492 }
1493
1494 // If we had any backedges then there *is* a cloned loop. Put the header into
1495 // the loop set and then walk the worklist backwards to find all the blocks
1496 // that remain within the loop after cloning.
1497 if (!BlocksInClonedLoop.empty()) {
1498 BlocksInClonedLoop.insert(ClonedHeader);
1499
1500 while (!Worklist.empty()) {
1501 BasicBlock *BB = Worklist.pop_back_val();
1502 assert(BlocksInClonedLoop.count(BB) &&
1503 "Didn't put block into the loop set!");
1504
1505 // Insert any predecessors that are in the possible set into the cloned
1506 // set, and if the insert is successful, add them to the worklist. Note
1507 // that we filter on the blocks that are definitely reachable via the
1508 // backedge to the loop header so we may prune out dead code within the
1509 // cloned loop.
1510 for (auto *Pred : predecessors(BB))
1511 if (ClonedLoopBlocks.count(Pred) &&
1512 BlocksInClonedLoop.insert(Pred).second)
1513 Worklist.push_back(Pred);
1514 }
1515
1516 ClonedL = LI.AllocateLoop();
1517 if (ParentL) {
1518 ParentL->addBasicBlockToLoop(ClonedPH, LI);
1519 ParentL->addChildLoop(ClonedL);
1520 } else {
1521 LI.addTopLevelLoop(ClonedL);
1522 }
1523 NonChildClonedLoops.push_back(ClonedL);
1524
1525 ClonedL->reserveBlocks(BlocksInClonedLoop.size());
1526 // We don't want to just add the cloned loop blocks based on how we
1527 // discovered them. The original order of blocks was carefully built in
1528 // a way that doesn't rely on predecessor ordering. Rather than re-invent
1529 // that logic, we just re-walk the original blocks (and those of the child
1530 // loops) and filter them as we add them into the cloned loop.
1531 for (auto *BB : OrigL.blocks()) {
1532 auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB));
1533 if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB))
1534 continue;
1535
1536 // Directly add the blocks that are only in this loop.
1537 if (LI.getLoopFor(BB) == &OrigL) {
1538 ClonedL->addBasicBlockToLoop(ClonedBB, LI);
1539 continue;
1540 }
1541
1542 // We want to manually add it to this loop and parents.
1543 // Registering it with LoopInfo will happen when we clone the top
1544 // loop for this block.
1545 for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
1546 PL->addBlockEntry(ClonedBB);
1547 }
1548
1549 // Now add each child loop whose header remains within the cloned loop. All
1550 // of the blocks within the loop must satisfy the same constraints as the
1551 // header so once we pass the header checks we can just clone the entire
1552 // child loop nest.
1553 for (Loop *ChildL : OrigL) {
1554 auto *ClonedChildHeader =
1555 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1556 if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader))
1557 continue;
1558
1559#ifndef NDEBUG
1560 // We should never have a cloned child loop header but fail to have
1561 // all of the blocks for that child loop.
1562 for (auto *ChildLoopBB : ChildL->blocks())
1563 assert(BlocksInClonedLoop.count(
1564 cast<BasicBlock>(VMap.lookup(ChildLoopBB))) &&
1565 "Child cloned loop has a header within the cloned outer "
1566 "loop but not all of its blocks!");
1567#endif
1568
1569 cloneLoopNest(*ChildL, ClonedL, VMap, LI);
1570 }
1571 }
1572
1573 // Now that we've handled all the components of the original loop that were
1574 // cloned into a new loop, we still need to handle anything from the original
1575 // loop that wasn't in a cloned loop.
1576
1577 // Figure out what blocks are left to place within any loop nest containing
1578 // the unswitched loop. If we never formed a loop, the cloned PH is one of
1579 // them.
1580 SmallPtrSet<BasicBlock *, 16> UnloopedBlockSet;
1581 if (BlocksInClonedLoop.empty())
1582 UnloopedBlockSet.insert(ClonedPH);
1583 for (auto *ClonedBB : ClonedLoopBlocks)
1584 if (!BlocksInClonedLoop.count(ClonedBB))
1585 UnloopedBlockSet.insert(ClonedBB);
1586
1587 // Copy the cloned exits and sort them in ascending loop depth, we'll work
1588 // backwards across these to process them inside out. The order shouldn't
1589 // matter as we're just trying to build up the map from inside-out; we use
1590 // the map in a more stably ordered way below.
1591 auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
1592 llvm::sort(OrderedClonedExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1593 return ExitLoopMap.lookup(LHS)->getLoopDepth() <
1594 ExitLoopMap.lookup(RHS)->getLoopDepth();
1595 });
1596
1597 // Populate the existing ExitLoopMap with everything reachable from each
1598 // exit, starting from the inner most exit.
1599 while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) {
1600 assert(Worklist.empty() && "Didn't clear worklist!");
1601
1602 BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
1603 Loop *ExitL = ExitLoopMap.lookup(ExitBB);
1604
1605 // Walk the CFG back until we hit the cloned PH adding everything reachable
1606 // and in the unlooped set to this exit block's loop.
1607 Worklist.push_back(ExitBB);
1608 do {
1609 BasicBlock *BB = Worklist.pop_back_val();
1610 // We can stop recursing at the cloned preheader (if we get there).
1611 if (BB == ClonedPH)
1612 continue;
1613
1614 for (BasicBlock *PredBB : predecessors(BB)) {
1615 // If this pred has already been moved to our set or is part of some
1616 // (inner) loop, no update needed.
1617 if (!UnloopedBlockSet.erase(PredBB)) {
1618 assert(
1619 (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) &&
1620 "Predecessor not mapped to a loop!");
1621 continue;
1622 }
1623
1624 // We just insert into the loop set here. We'll add these blocks to the
1625 // exit loop after we build up the set in an order that doesn't rely on
1626 // predecessor order (which in turn relies on use list order).
1627 bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second;
1628 (void)Inserted;
1629 assert(Inserted && "Should only visit an unlooped block once!");
1630
1631 // And recurse through to its predecessors.
1632 Worklist.push_back(PredBB);
1633 }
1634 } while (!Worklist.empty());
1635 }
1636
1637 // Now that the ExitLoopMap gives as mapping for all the non-looping cloned
1638 // blocks to their outer loops, walk the cloned blocks and the cloned exits
1639 // in their original order adding them to the correct loop.
1640
1641 // We need a stable insertion order. We use the order of the original loop
1642 // order and map into the correct parent loop.
1643 for (auto *BB : llvm::concat<BasicBlock *const>(
1644 ArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
1645 if (Loop *OuterL = ExitLoopMap.lookup(BB))
1646 OuterL->addBasicBlockToLoop(BB, LI);
1647
1648#ifndef NDEBUG
1649 for (auto &BBAndL : ExitLoopMap) {
1650 auto *BB = BBAndL.first;
1651 auto *OuterL = BBAndL.second;
1652 assert(LI.getLoopFor(BB) == OuterL &&
1653 "Failed to put all blocks into outer loops!");
1654 }
1655#endif
1656
1657 // Now that all the blocks are placed into the correct containing loop in the
1658 // absence of child loops, find all the potentially cloned child loops and
1659 // clone them into whatever outer loop we placed their header into.
1660 for (Loop *ChildL : OrigL) {
1661 auto *ClonedChildHeader =
1662 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1663 if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader))
1664 continue;
1665
1666#ifndef NDEBUG
1667 for (auto *ChildLoopBB : ChildL->blocks())
1668 assert(VMap.count(ChildLoopBB) &&
1669 "Cloned a child loop header but not all of that loops blocks!");
1670#endif
1671
1672 NonChildClonedLoops.push_back(cloneLoopNest(
1673 *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));
1674 }
1675}
1676
1677static void
1679 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
1680 DominatorTree &DT, MemorySSAUpdater *MSSAU) {
1681 // Find all the dead clones, and remove them from their successors.
1683 for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
1684 for (const auto &VMap : VMaps)
1685 if (BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))
1686 if (!DT.isReachableFromEntry(ClonedBB)) {
1687 for (BasicBlock *SuccBB : successors(ClonedBB))
1688 SuccBB->removePredecessor(ClonedBB);
1689 DeadBlocks.push_back(ClonedBB);
1690 }
1691
1692 // Remove all MemorySSA in the dead blocks
1693 if (MSSAU) {
1694 SmallSetVector<BasicBlock *, 8> DeadBlockSet(DeadBlocks.begin(),
1695 DeadBlocks.end());
1696 MSSAU->removeBlocks(DeadBlockSet);
1697 }
1698
1699 // Drop any remaining references to break cycles.
1700 for (BasicBlock *BB : DeadBlocks)
1701 BB->dropAllReferences();
1702 // Erase them from the IR.
1703 for (BasicBlock *BB : DeadBlocks)
1704 BB->eraseFromParent();
1705}
1706
1709 DominatorTree &DT, LoopInfo &LI,
1710 MemorySSAUpdater *MSSAU,
1711 ScalarEvolution *SE,
1712 LPMUpdater &LoopUpdater) {
1713 // Find all the dead blocks tied to this loop, and remove them from their
1714 // successors.
1716
1717 // Start with loop/exit blocks and get a transitive closure of reachable dead
1718 // blocks.
1719 SmallVector<BasicBlock *, 16> DeathCandidates(ExitBlocks.begin(),
1720 ExitBlocks.end());
1721 DeathCandidates.append(L.blocks().begin(), L.blocks().end());
1722 while (!DeathCandidates.empty()) {
1723 auto *BB = DeathCandidates.pop_back_val();
1724 if (!DeadBlockSet.count(BB) && !DT.isReachableFromEntry(BB)) {
1725 for (BasicBlock *SuccBB : successors(BB)) {
1726 SuccBB->removePredecessor(BB);
1727 DeathCandidates.push_back(SuccBB);
1728 }
1729 DeadBlockSet.insert(BB);
1730 }
1731 }
1732
1733 // Remove all MemorySSA in the dead blocks
1734 if (MSSAU)
1735 MSSAU->removeBlocks(DeadBlockSet);
1736
1737 // Filter out the dead blocks from the exit blocks list so that it can be
1738 // used in the caller.
1739 llvm::erase_if(ExitBlocks,
1740 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1741
1742 // Walk from this loop up through its parents removing all of the dead blocks.
1743 for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
1744 for (auto *BB : DeadBlockSet)
1745 ParentL->getBlocksSet().erase(BB);
1746 llvm::erase_if(ParentL->getBlocksVector(),
1747 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1748 }
1749
1750 // Now delete the dead child loops. This raw delete will clear them
1751 // recursively.
1752 llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) {
1753 if (!DeadBlockSet.count(ChildL->getHeader()))
1754 return false;
1755
1756 assert(llvm::all_of(ChildL->blocks(),
1757 [&](BasicBlock *ChildBB) {
1758 return DeadBlockSet.count(ChildBB);
1759 }) &&
1760 "If the child loop header is dead all blocks in the child loop must "
1761 "be dead as well!");
1762 LoopUpdater.markLoopAsDeleted(*ChildL, ChildL->getName());
1763 if (SE)
1765 LI.destroy(ChildL);
1766 return true;
1767 });
1768
1769 // Remove the loop mappings for the dead blocks and drop all the references
1770 // from these blocks to others to handle cyclic references as we start
1771 // deleting the blocks themselves.
1772 for (auto *BB : DeadBlockSet) {
1773 // Check that the dominator tree has already been updated.
1774 assert(!DT.getNode(BB) && "Should already have cleared domtree!");
1775 LI.changeLoopFor(BB, nullptr);
1776 // Drop all uses of the instructions to make sure we won't have dangling
1777 // uses in other blocks.
1778 for (auto &I : *BB)
1779 if (!I.use_empty())
1780 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
1781 BB->dropAllReferences();
1782 }
1783
1784 // Actually delete the blocks now that they've been fully unhooked from the
1785 // IR.
1786 for (auto *BB : DeadBlockSet)
1787 BB->eraseFromParent();
1788}
1789
1790/// Recompute the set of blocks in a loop after unswitching.
1791///
1792/// This walks from the original headers predecessors to rebuild the loop. We
1793/// take advantage of the fact that new blocks can't have been added, and so we
1794/// filter by the original loop's blocks. This also handles potentially
1795/// unreachable code that we don't want to explore but might be found examining
1796/// the predecessors of the header.
1797///
1798/// If the original loop is no longer a loop, this will return an empty set. If
1799/// it remains a loop, all the blocks within it will be added to the set
1800/// (including those blocks in inner loops).
1802 LoopInfo &LI) {
1804
1805 auto *PH = L.getLoopPreheader();
1806 auto *Header = L.getHeader();
1807
1808 // A worklist to use while walking backwards from the header.
1810
1811 // First walk the predecessors of the header to find the backedges. This will
1812 // form the basis of our walk.
1813 for (auto *Pred : predecessors(Header)) {
1814 // Skip the preheader.
1815 if (Pred == PH)
1816 continue;
1817
1818 // Because the loop was in simplified form, the only non-loop predecessor
1819 // is the preheader.
1820 assert(L.contains(Pred) && "Found a predecessor of the loop header other "
1821 "than the preheader that is not part of the "
1822 "loop!");
1823
1824 // Insert this block into the loop set and on the first visit and, if it
1825 // isn't the header we're currently walking, put it into the worklist to
1826 // recurse through.
1827 if (LoopBlockSet.insert(Pred).second && Pred != Header)
1828 Worklist.push_back(Pred);
1829 }
1830
1831 // If no backedges were found, we're done.
1832 if (LoopBlockSet.empty())
1833 return LoopBlockSet;
1834
1835 // We found backedges, recurse through them to identify the loop blocks.
1836 while (!Worklist.empty()) {
1837 BasicBlock *BB = Worklist.pop_back_val();
1838 assert(LoopBlockSet.count(BB) && "Didn't put block into the loop set!");
1839
1840 // No need to walk past the header.
1841 if (BB == Header)
1842 continue;
1843
1844 // Because we know the inner loop structure remains valid we can use the
1845 // loop structure to jump immediately across the entire nested loop.
1846 // Further, because it is in loop simplified form, we can directly jump
1847 // to its preheader afterward.
1848 if (Loop *InnerL = LI.getLoopFor(BB))
1849 if (InnerL != &L) {
1850 assert(L.contains(InnerL) &&
1851 "Should not reach a loop *outside* this loop!");
1852 // The preheader is the only possible predecessor of the loop so
1853 // insert it into the set and check whether it was already handled.
1854 auto *InnerPH = InnerL->getLoopPreheader();
1855 assert(L.contains(InnerPH) && "Cannot contain an inner loop block "
1856 "but not contain the inner loop "
1857 "preheader!");
1858 if (!LoopBlockSet.insert(InnerPH).second)
1859 // The only way to reach the preheader is through the loop body
1860 // itself so if it has been visited the loop is already handled.
1861 continue;
1862
1863 // Insert all of the blocks (other than those already present) into
1864 // the loop set. We expect at least the block that led us to find the
1865 // inner loop to be in the block set, but we may also have other loop
1866 // blocks if they were already enqueued as predecessors of some other
1867 // outer loop block.
1868 for (auto *InnerBB : InnerL->blocks()) {
1869 if (InnerBB == BB) {
1870 assert(LoopBlockSet.count(InnerBB) &&
1871 "Block should already be in the set!");
1872 continue;
1873 }
1874
1875 LoopBlockSet.insert(InnerBB);
1876 }
1877
1878 // Add the preheader to the worklist so we will continue past the
1879 // loop body.
1880 Worklist.push_back(InnerPH);
1881 continue;
1882 }
1883
1884 // Insert any predecessors that were in the original loop into the new
1885 // set, and if the insert is successful, add them to the worklist.
1886 for (auto *Pred : predecessors(BB))
1887 if (L.contains(Pred) && LoopBlockSet.insert(Pred).second)
1888 Worklist.push_back(Pred);
1889 }
1890
1891 assert(LoopBlockSet.count(Header) && "Cannot fail to add the header!");
1892
1893 // We've found all the blocks participating in the loop, return our completed
1894 // set.
1895 return LoopBlockSet;
1896}
1897
1898/// Rebuild a loop after unswitching removes some subset of blocks and edges.
1899///
1900/// The removal may have removed some child loops entirely but cannot have
1901/// disturbed any remaining child loops. However, they may need to be hoisted
1902/// to the parent loop (or to be top-level loops). The original loop may be
1903/// completely removed.
1904///
1905/// The sibling loops resulting from this update are returned. If the original
1906/// loop remains a valid loop, it will be the first entry in this list with all
1907/// of the newly sibling loops following it.
1908///
1909/// Returns true if the loop remains a loop after unswitching, and false if it
1910/// is no longer a loop after unswitching (and should not continue to be
1911/// referenced).
1913 LoopInfo &LI,
1914 SmallVectorImpl<Loop *> &HoistedLoops,
1915 ScalarEvolution *SE) {
1916 auto *PH = L.getLoopPreheader();
1917
1918 // Compute the actual parent loop from the exit blocks. Because we may have
1919 // pruned some exits the loop may be different from the original parent.
1920 Loop *ParentL = nullptr;
1921 SmallVector<Loop *, 4> ExitLoops;
1922 SmallVector<BasicBlock *, 4> ExitsInLoops;
1923 ExitsInLoops.reserve(ExitBlocks.size());
1924 for (auto *ExitBB : ExitBlocks)
1925 if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1926 ExitLoops.push_back(ExitL);
1927 ExitsInLoops.push_back(ExitBB);
1928 if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1929 ParentL = ExitL;
1930 }
1931
1932 // Recompute the blocks participating in this loop. This may be empty if it
1933 // is no longer a loop.
1934 auto LoopBlockSet = recomputeLoopBlockSet(L, LI);
1935
1936 // If we still have a loop, we need to re-set the loop's parent as the exit
1937 // block set changing may have moved it within the loop nest. Note that this
1938 // can only happen when this loop has a parent as it can only hoist the loop
1939 // *up* the nest.
1940 if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
1941 // Remove this loop's (original) blocks from all of the intervening loops.
1942 for (Loop *IL = L.getParentLoop(); IL != ParentL;
1943 IL = IL->getParentLoop()) {
1944 IL->getBlocksSet().erase(PH);
1945 for (auto *BB : L.blocks())
1946 IL->getBlocksSet().erase(BB);
1947 llvm::erase_if(IL->getBlocksVector(), [&](BasicBlock *BB) {
1948 return BB == PH || L.contains(BB);
1949 });
1950 }
1951
1952 LI.changeLoopFor(PH, ParentL);
1953 L.getParentLoop()->removeChildLoop(&L);
1954 if (ParentL)
1955 ParentL->addChildLoop(&L);
1956 else
1957 LI.addTopLevelLoop(&L);
1958 }
1959
1960 // Now we update all the blocks which are no longer within the loop.
1961 auto &Blocks = L.getBlocksVector();
1962 auto BlocksSplitI =
1963 LoopBlockSet.empty()
1964 ? Blocks.begin()
1965 : std::stable_partition(
1966 Blocks.begin(), Blocks.end(),
1967 [&](BasicBlock *BB) { return LoopBlockSet.count(BB); });
1968
1969 // Before we erase the list of unlooped blocks, build a set of them.
1970 SmallPtrSet<BasicBlock *, 16> UnloopedBlocks(BlocksSplitI, Blocks.end());
1971 if (LoopBlockSet.empty())
1972 UnloopedBlocks.insert(PH);
1973
1974 // Now erase these blocks from the loop.
1975 for (auto *BB : make_range(BlocksSplitI, Blocks.end()))
1976 L.getBlocksSet().erase(BB);
1977 Blocks.erase(BlocksSplitI, Blocks.end());
1978
1979 // Sort the exits in ascending loop depth, we'll work backwards across these
1980 // to process them inside out.
1981 llvm::stable_sort(ExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1982 return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
1983 });
1984
1985 // We'll build up a set for each exit loop.
1986 SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;
1987 Loop *PrevExitL = L.getParentLoop(); // The deepest possible exit loop.
1988
1989 auto RemoveUnloopedBlocksFromLoop =
1990 [](Loop &L, SmallPtrSetImpl<BasicBlock *> &UnloopedBlocks) {
1991 for (auto *BB : UnloopedBlocks)
1992 L.getBlocksSet().erase(BB);
1993 llvm::erase_if(L.getBlocksVector(), [&](BasicBlock *BB) {
1994 return UnloopedBlocks.count(BB);
1995 });
1996 };
1997
1999 while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) {
2000 assert(Worklist.empty() && "Didn't clear worklist!");
2001 assert(NewExitLoopBlocks.empty() && "Didn't clear loop set!");
2002
2003 // Grab the next exit block, in decreasing loop depth order.
2004 BasicBlock *ExitBB = ExitsInLoops.pop_back_val();
2005 Loop &ExitL = *LI.getLoopFor(ExitBB);
2006 assert(ExitL.contains(&L) && "Exit loop must contain the inner loop!");
2007
2008 // Erase all of the unlooped blocks from the loops between the previous
2009 // exit loop and this exit loop. This works because the ExitInLoops list is
2010 // sorted in increasing order of loop depth and thus we visit loops in
2011 // decreasing order of loop depth.
2012 for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop())
2013 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
2014
2015 // Walk the CFG back until we hit the cloned PH adding everything reachable
2016 // and in the unlooped set to this exit block's loop.
2017 Worklist.push_back(ExitBB);
2018 do {
2019 BasicBlock *BB = Worklist.pop_back_val();
2020 // We can stop recursing at the cloned preheader (if we get there).
2021 if (BB == PH)
2022 continue;
2023
2024 for (BasicBlock *PredBB : predecessors(BB)) {
2025 // If this pred has already been moved to our set or is part of some
2026 // (inner) loop, no update needed.
2027 if (!UnloopedBlocks.erase(PredBB)) {
2028 assert((NewExitLoopBlocks.count(PredBB) ||
2029 ExitL.contains(LI.getLoopFor(PredBB))) &&
2030 "Predecessor not in a nested loop (or already visited)!");
2031 continue;
2032 }
2033
2034 // We just insert into the loop set here. We'll add these blocks to the
2035 // exit loop after we build up the set in a deterministic order rather
2036 // than the predecessor-influenced visit order.
2037 bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
2038 (void)Inserted;
2039 assert(Inserted && "Should only visit an unlooped block once!");
2040
2041 // And recurse through to its predecessors.
2042 Worklist.push_back(PredBB);
2043 }
2044 } while (!Worklist.empty());
2045
2046 // If blocks in this exit loop were directly part of the original loop (as
2047 // opposed to a child loop) update the map to point to this exit loop. This
2048 // just updates a map and so the fact that the order is unstable is fine.
2049 for (auto *BB : NewExitLoopBlocks)
2050 if (Loop *BBL = LI.getLoopFor(BB))
2051 if (BBL == &L || !L.contains(BBL))
2052 LI.changeLoopFor(BB, &ExitL);
2053
2054 // We will remove the remaining unlooped blocks from this loop in the next
2055 // iteration or below.
2056 NewExitLoopBlocks.clear();
2057 }
2058
2059 // Any remaining unlooped blocks are no longer part of any loop unless they
2060 // are part of some child loop.
2061 for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop())
2062 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
2063 for (auto *BB : UnloopedBlocks)
2064 if (Loop *BBL = LI.getLoopFor(BB))
2065 if (BBL == &L || !L.contains(BBL))
2066 LI.changeLoopFor(BB, nullptr);
2067
2068 // Sink all the child loops whose headers are no longer in the loop set to
2069 // the parent (or to be top level loops). We reach into the loop and directly
2070 // update its subloop vector to make this batch update efficient.
2071 auto &SubLoops = L.getSubLoopsVector();
2072 auto SubLoopsSplitI =
2073 LoopBlockSet.empty()
2074 ? SubLoops.begin()
2075 : std::stable_partition(
2076 SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) {
2077 return LoopBlockSet.count(SubL->getHeader());
2078 });
2079 for (auto *HoistedL : make_range(SubLoopsSplitI, SubLoops.end())) {
2080 HoistedLoops.push_back(HoistedL);
2081 HoistedL->setParentLoop(nullptr);
2082
2083 // To compute the new parent of this hoisted loop we look at where we
2084 // placed the preheader above. We can't lookup the header itself because we
2085 // retained the mapping from the header to the hoisted loop. But the
2086 // preheader and header should have the exact same new parent computed
2087 // based on the set of exit blocks from the original loop as the preheader
2088 // is a predecessor of the header and so reached in the reverse walk. And
2089 // because the loops were all in simplified form the preheader of the
2090 // hoisted loop can't be part of some *other* loop.
2091 if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader()))
2092 NewParentL->addChildLoop(HoistedL);
2093 else
2094 LI.addTopLevelLoop(HoistedL);
2095 }
2096 SubLoops.erase(SubLoopsSplitI, SubLoops.end());
2097
2098 // Actually delete the loop if nothing remained within it.
2099 if (Blocks.empty()) {
2100 assert(SubLoops.empty() &&
2101 "Failed to remove all subloops from the original loop!");
2102 if (Loop *ParentL = L.getParentLoop())
2103 ParentL->removeChildLoop(llvm::find(*ParentL, &L));
2104 else
2105 LI.removeLoop(llvm::find(LI, &L));
2106 // markLoopAsDeleted for L should be triggered by the caller (it is
2107 // typically done within postUnswitch).
2108 if (SE)
2110 LI.destroy(&L);
2111 return false;
2112 }
2113
2114 return true;
2115}
2116
2117/// Helper to visit a dominator subtree, invoking a callable on each node.
2118///
2119/// Returning false at any point will stop walking past that node of the tree.
2120template <typename CallableT>
2121void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
2123 DomWorklist.push_back(DT[BB]);
2124#ifndef NDEBUG
2126 Visited.insert(DT[BB]);
2127#endif
2128 do {
2129 DomTreeNode *N = DomWorklist.pop_back_val();
2130
2131 // Visit this node.
2132 if (!Callable(N->getBlock()))
2133 continue;
2134
2135 // Accumulate the child nodes.
2136 for (DomTreeNode *ChildN : *N) {
2137 assert(Visited.insert(ChildN).second &&
2138 "Cannot visit a node twice when walking a tree!");
2139 DomWorklist.push_back(ChildN);
2140 }
2141 } while (!DomWorklist.empty());
2142}
2143
2145 bool CurrentLoopValid, bool PartiallyInvariant,
2146 bool InjectedCondition, ArrayRef<Loop *> NewLoops) {
2147 // If we did a non-trivial unswitch, we have added new (cloned) loops.
2148 if (!NewLoops.empty())
2149 U.addSiblingLoops(NewLoops);
2150
2151 // If the current loop remains valid, we should revisit it to catch any
2152 // other unswitch opportunities. Otherwise, we need to mark it as deleted.
2153 if (CurrentLoopValid) {
2154 if (PartiallyInvariant) {
2155 // Mark the new loop as partially unswitched, to avoid unswitching on
2156 // the same condition again.
2157 auto &Context = L.getHeader()->getContext();
2158 MDNode *DisableUnswitchMD = MDNode::get(
2159 Context,
2160 MDString::get(Context, "llvm.loop.unswitch.partial.disable"));
2162 Context, L.getLoopID(), {"llvm.loop.unswitch.partial"},
2163 {DisableUnswitchMD});
2164 L.setLoopID(NewLoopID);
2165 } else if (InjectedCondition) {
2166 // Do the same for injection of invariant conditions.
2167 auto &Context = L.getHeader()->getContext();
2168 MDNode *DisableUnswitchMD = MDNode::get(
2169 Context,
2170 MDString::get(Context, "llvm.loop.unswitch.injection.disable"));
2172 Context, L.getLoopID(), {"llvm.loop.unswitch.injection"},
2173 {DisableUnswitchMD});
2174 L.setLoopID(NewLoopID);
2175 } else
2176 U.revisitCurrentLoop();
2177 } else
2178 U.markLoopAsDeleted(L, LoopName);
2179}
2180
2182 Loop &L, Instruction &TI, ArrayRef<Value *> Invariants,
2183 IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI,
2185 LPMUpdater &LoopUpdater, bool InsertFreeze, bool InjectedCondition) {
2186 auto *ParentBB = TI.getParent();
2187 BranchInst *BI = dyn_cast<BranchInst>(&TI);
2188 SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
2189
2190 // Save the current loop name in a variable so that we can report it even
2191 // after it has been deleted.
2192 std::string LoopName(L.getName());
2193
2194 // We can only unswitch switches, conditional branches with an invariant
2195 // condition, or combining invariant conditions with an instruction or
2196 // partially invariant instructions.
2197 assert((SI || (BI && BI->isConditional())) &&
2198 "Can only unswitch switches and conditional branch!");
2199 bool PartiallyInvariant = !PartialIVInfo.InstToDuplicate.empty();
2200 bool FullUnswitch =
2201 SI || (skipTrivialSelect(BI->getCondition()) == Invariants[0] &&
2202 !PartiallyInvariant);
2203 if (FullUnswitch)
2204 assert(Invariants.size() == 1 &&
2205 "Cannot have other invariants with full unswitching!");
2206 else
2207 assert(isa<Instruction>(skipTrivialSelect(BI->getCondition())) &&
2208 "Partial unswitching requires an instruction as the condition!");
2209
2210 if (MSSAU && VerifyMemorySSA)
2211 MSSAU->getMemorySSA()->verifyMemorySSA();
2212
2213 // Constant and BBs tracking the cloned and continuing successor. When we are
2214 // unswitching the entire condition, this can just be trivially chosen to
2215 // unswitch towards `true`. However, when we are unswitching a set of
2216 // invariants combined with `and` or `or` or partially invariant instructions,
2217 // the combining operation determines the best direction to unswitch: we want
2218 // to unswitch the direction that will collapse the branch.
2219 bool Direction = true;
2220 int ClonedSucc = 0;
2221 if (!FullUnswitch) {
2223 (void)Cond;
2225 PartiallyInvariant) &&
2226 "Only `or`, `and`, an `select`, partially invariant instructions "
2227 "can combine invariants being unswitched.");
2228 if (!match(Cond, m_LogicalOr())) {
2229 if (match(Cond, m_LogicalAnd()) ||
2230 (PartiallyInvariant && !PartialIVInfo.KnownValue->isOneValue())) {
2231 Direction = false;
2232 ClonedSucc = 1;
2233 }
2234 }
2235 }
2236
2237 BasicBlock *RetainedSuccBB =
2238 BI ? BI->getSuccessor(1 - ClonedSucc) : SI->getDefaultDest();
2239 SmallSetVector<BasicBlock *, 4> UnswitchedSuccBBs;
2240 if (BI)
2241 UnswitchedSuccBBs.insert(BI->getSuccessor(ClonedSucc));
2242 else
2243 for (auto Case : SI->cases())
2244 if (Case.getCaseSuccessor() != RetainedSuccBB)
2245 UnswitchedSuccBBs.insert(Case.getCaseSuccessor());
2246
2247 assert(!UnswitchedSuccBBs.count(RetainedSuccBB) &&
2248 "Should not unswitch the same successor we are retaining!");
2249
2250 // The branch should be in this exact loop. Any inner loop's invariant branch
2251 // should be handled by unswitching that inner loop. The caller of this
2252 // routine should filter out any candidates that remain (but were skipped for
2253 // whatever reason).
2254 assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!");
2255
2256 // Compute the parent loop now before we start hacking on things.
2257 Loop *ParentL = L.getParentLoop();
2258 // Get blocks in RPO order for MSSA update, before changing the CFG.
2259 LoopBlocksRPO LBRPO(&L);
2260 if (MSSAU)
2261 LBRPO.perform(&LI);
2262
2263 // Compute the outer-most loop containing one of our exit blocks. This is the
2264 // furthest up our loopnest which can be mutated, which we will use below to
2265 // update things.
2266 Loop *OuterExitL = &L;
2268 L.getUniqueExitBlocks(ExitBlocks);
2269 for (auto *ExitBB : ExitBlocks) {
2270 // ExitBB can be an exit block for several levels in the loop nest. Make
2271 // sure we find the top most.
2272 Loop *NewOuterExitL = getTopMostExitingLoop(ExitBB, LI);
2273 if (!NewOuterExitL) {
2274 // We exited the entire nest with this block, so we're done.
2275 OuterExitL = nullptr;
2276 break;
2277 }
2278 if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL))
2279 OuterExitL = NewOuterExitL;
2280 }
2281
2282 // At this point, we're definitely going to unswitch something so invalidate
2283 // any cached information in ScalarEvolution for the outer most loop
2284 // containing an exit block and all nested loops.
2285 if (SE) {
2286 if (OuterExitL)
2287 SE->forgetLoop(OuterExitL);
2288 else
2289 SE->forgetTopmostLoop(&L);
2291 }
2292
2293 // If the edge from this terminator to a successor dominates that successor,
2294 // store a map from each block in its dominator subtree to it. This lets us
2295 // tell when cloning for a particular successor if a block is dominated by
2296 // some *other* successor with a single data structure. We use this to
2297 // significantly reduce cloning.
2299 for (auto *SuccBB : llvm::concat<BasicBlock *const>(ArrayRef(RetainedSuccBB),
2300 UnswitchedSuccBBs))
2301 if (SuccBB->getUniquePredecessor() ||
2302 llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
2303 return PredBB == ParentBB || DT.dominates(SuccBB, PredBB);
2304 }))
2305 visitDomSubTree(DT, SuccBB, [&](BasicBlock *BB) {
2306 DominatingSucc[BB] = SuccBB;
2307 return true;
2308 });
2309
2310 // Split the preheader, so that we know that there is a safe place to insert
2311 // the conditional branch. We will change the preheader to have a conditional
2312 // branch on LoopCond. The original preheader will become the split point
2313 // between the unswitched versions, and we will have a new preheader for the
2314 // original loop.
2315 BasicBlock *SplitBB = L.getLoopPreheader();
2316 BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI, MSSAU);
2317
2318 // Keep track of the dominator tree updates needed.
2320
2321 // Clone the loop for each unswitched successor.
2323 VMaps.reserve(UnswitchedSuccBBs.size());
2325 for (auto *SuccBB : UnswitchedSuccBBs) {
2326 VMaps.emplace_back(new ValueToValueMapTy());
2327 ClonedPHs[SuccBB] = buildClonedLoopBlocks(
2328 L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
2329 DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU, SE);
2330 }
2331
2332 // Drop metadata if we may break its semantics by moving this instr into the
2333 // split block.
2334 if (TI.getMetadata(LLVMContext::MD_make_implicit)) {
2336 // Do not spend time trying to understand if we can keep it, just drop it
2337 // to save compile time.
2338 TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2339 else {
2340 // It is only legal to preserve make.implicit metadata if we are
2341 // guaranteed no reach implicit null check after following this branch.
2342 ICFLoopSafetyInfo SafetyInfo;
2343 SafetyInfo.computeLoopSafetyInfo(&L);
2344 if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
2345 TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2346 }
2347 }
2348
2349 // The stitching of the branched code back together depends on whether we're
2350 // doing full unswitching or not with the exception that we always want to
2351 // nuke the initial terminator placed in the split block.
2352 SplitBB->getTerminator()->eraseFromParent();
2353 if (FullUnswitch) {
2354 // Keep a clone of the terminator for MSSA updates.
2355 Instruction *NewTI = TI.clone();
2356 NewTI->insertInto(ParentBB, ParentBB->end());
2357
2358 // Splice the terminator from the original loop and rewrite its
2359 // successors.
2360 TI.moveBefore(*SplitBB, SplitBB->end());
2361 TI.dropLocation();
2362
2363 // First wire up the moved terminator to the preheaders.
2364 if (BI) {
2365 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2366 BI->setSuccessor(ClonedSucc, ClonedPH);
2367 BI->setSuccessor(1 - ClonedSucc, LoopPH);
2369 if (InsertFreeze) {
2370 // We don't give any debug location to the new freeze, because the
2371 // BI (`dyn_cast<BranchInst>(TI)`) is an in-loop instruction hoisted
2372 // out of the loop.
2373 Cond = new FreezeInst(Cond, Cond->getName() + ".fr", BI->getIterator());
2374 cast<Instruction>(Cond)->setDebugLoc(DebugLoc::getDropped());
2375 }
2376 BI->setCondition(Cond);
2377 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2378 } else {
2379 assert(SI && "Must either be a branch or switch!");
2380
2381 // Walk the cases and directly update their successors.
2382 assert(SI->getDefaultDest() == RetainedSuccBB &&
2383 "Not retaining default successor!");
2384 SI->setDefaultDest(LoopPH);
2385 for (const auto &Case : SI->cases())
2386 if (Case.getCaseSuccessor() == RetainedSuccBB)
2387 Case.setSuccessor(LoopPH);
2388 else
2389 Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
2390
2391 if (InsertFreeze)
2392 SI->setCondition(new FreezeInst(SI->getCondition(),
2393 SI->getCondition()->getName() + ".fr",
2394 SI->getIterator()));
2395
2396 // We need to use the set to populate domtree updates as even when there
2397 // are multiple cases pointing at the same successor we only want to
2398 // remove and insert one edge in the domtree.
2399 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2400 DTUpdates.push_back(
2401 {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second});
2402 }
2403
2404 if (MSSAU) {
2405 DT.applyUpdates(DTUpdates);
2406 DTUpdates.clear();
2407
2408 // Remove all but one edge to the retained block and all unswitched
2409 // blocks. This is to avoid having duplicate entries in the cloned Phis,
2410 // when we know we only keep a single edge for each case.
2411 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, RetainedSuccBB);
2412 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2413 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, SuccBB);
2414
2415 for (auto &VMap : VMaps)
2416 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2417 /*IgnoreIncomingWithNoClones=*/true);
2418 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2419
2420 // Remove all edges to unswitched blocks.
2421 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2422 MSSAU->removeEdge(ParentBB, SuccBB);
2423 }
2424
2425 // Now unhook the successor relationship as we'll be replacing
2426 // the terminator with a direct branch. This is much simpler for branches
2427 // than switches so we handle those first.
2428 if (BI) {
2429 // Remove the parent as a predecessor of the unswitched successor.
2430 assert(UnswitchedSuccBBs.size() == 1 &&
2431 "Only one possible unswitched block for a branch!");
2432 BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin();
2433 UnswitchedSuccBB->removePredecessor(ParentBB,
2434 /*KeepOneInputPHIs*/ true);
2435 DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
2436 } else {
2437 // Note that we actually want to remove the parent block as a predecessor
2438 // of *every* case successor. The case successor is either unswitched,
2439 // completely eliminating an edge from the parent to that successor, or it
2440 // is a duplicate edge to the retained successor as the retained successor
2441 // is always the default successor and as we'll replace this with a direct
2442 // branch we no longer need the duplicate entries in the PHI nodes.
2443 SwitchInst *NewSI = cast<SwitchInst>(NewTI);
2444 assert(NewSI->getDefaultDest() == RetainedSuccBB &&
2445 "Not retaining default successor!");
2446 for (const auto &Case : NewSI->cases())
2447 Case.getCaseSuccessor()->removePredecessor(
2448 ParentBB,
2449 /*KeepOneInputPHIs*/ true);
2450
2451 // We need to use the set to populate domtree updates as even when there
2452 // are multiple cases pointing at the same successor we only want to
2453 // remove and insert one edge in the domtree.
2454 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2455 DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});
2456 }
2457
2458 // Create a new unconditional branch to the continuing block (as opposed to
2459 // the one cloned).
2460 Instruction *NewBI = BranchInst::Create(RetainedSuccBB, ParentBB);
2461 NewBI->setDebugLoc(NewTI->getDebugLoc());
2462
2463 // After MSSAU update, remove the cloned terminator instruction NewTI.
2464 NewTI->eraseFromParent();
2465 } else {
2466 assert(BI && "Only branches have partial unswitching.");
2467 assert(UnswitchedSuccBBs.size() == 1 &&
2468 "Only one possible unswitched block for a branch!");
2469 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2470 // When doing a partial unswitch, we have to do a bit more work to build up
2471 // the branch in the split block.
2472 if (PartiallyInvariant)
2474 *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
2475 else {
2477 *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,
2478 FreezeLoopUnswitchCond, BI, &AC, DT);
2479 }
2480 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2481
2482 if (MSSAU) {
2483 DT.applyUpdates(DTUpdates);
2484 DTUpdates.clear();
2485
2486 // Perform MSSA cloning updates.
2487 for (auto &VMap : VMaps)
2488 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2489 /*IgnoreIncomingWithNoClones=*/true);
2490 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2491 }
2492 }
2493
2494 // Apply the updates accumulated above to get an up-to-date dominator tree.
2495 DT.applyUpdates(DTUpdates);
2496
2497 // Now that we have an accurate dominator tree, first delete the dead cloned
2498 // blocks so that we can accurately build any cloned loops. It is important to
2499 // not delete the blocks from the original loop yet because we still want to
2500 // reference the original loop to understand the cloned loop's structure.
2501 deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT, MSSAU);
2502
2503 // Build the cloned loop structure itself. This may be substantially
2504 // different from the original structure due to the simplified CFG. This also
2505 // handles inserting all the cloned blocks into the correct loops.
2506 SmallVector<Loop *, 4> NonChildClonedLoops;
2507 for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)
2508 buildClonedLoops(L, ExitBlocks, *VMap, LI, NonChildClonedLoops);
2509
2510 // Now that our cloned loops have been built, we can update the original loop.
2511 // First we delete the dead blocks from it and then we rebuild the loop
2512 // structure taking these deletions into account.
2513 deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU, SE, LoopUpdater);
2514
2515 if (MSSAU && VerifyMemorySSA)
2516 MSSAU->getMemorySSA()->verifyMemorySSA();
2517
2518 SmallVector<Loop *, 4> HoistedLoops;
2519 bool IsStillLoop =
2520 rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops, SE);
2521
2522 if (MSSAU && VerifyMemorySSA)
2523 MSSAU->getMemorySSA()->verifyMemorySSA();
2524
2525 // This transformation has a high risk of corrupting the dominator tree, and
2526 // the below steps to rebuild loop structures will result in hard to debug
2527 // errors in that case so verify that the dominator tree is sane first.
2528 // FIXME: Remove this when the bugs stop showing up and rely on existing
2529 // verification steps.
2530 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
2531
2532 if (BI && !PartiallyInvariant) {
2533 // If we unswitched a branch which collapses the condition to a known
2534 // constant we want to replace all the uses of the invariants within both
2535 // the original and cloned blocks. We do this here so that we can use the
2536 // now updated dominator tree to identify which side the users are on.
2537 assert(UnswitchedSuccBBs.size() == 1 &&
2538 "Only one possible unswitched block for a branch!");
2539 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2540
2541 // When considering multiple partially-unswitched invariants
2542 // we cant just go replace them with constants in both branches.
2543 //
2544 // For 'AND' we infer that true branch ("continue") means true
2545 // for each invariant operand.
2546 // For 'OR' we can infer that false branch ("continue") means false
2547 // for each invariant operand.
2548 // So it happens that for multiple-partial case we dont replace
2549 // in the unswitched branch.
2550 bool ReplaceUnswitched =
2551 FullUnswitch || (Invariants.size() == 1) || PartiallyInvariant;
2552
2553 ConstantInt *UnswitchedReplacement =
2556 ConstantInt *ContinueReplacement =
2559 for (Value *Invariant : Invariants) {
2560 assert(!isa<Constant>(Invariant) &&
2561 "Should not be replacing constant values!");
2562 // Use make_early_inc_range here as set invalidates the iterator.
2563 for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
2564 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
2565 if (!UserI)
2566 continue;
2567
2568 // Replace it with the 'continue' side if in the main loop body, and the
2569 // unswitched if in the cloned blocks.
2570 if (DT.dominates(LoopPH, UserI->getParent()))
2571 U.set(ContinueReplacement);
2572 else if (ReplaceUnswitched &&
2573 DT.dominates(ClonedPH, UserI->getParent()))
2574 U.set(UnswitchedReplacement);
2575 }
2576 }
2577 }
2578
2579 // We can change which blocks are exit blocks of all the cloned sibling
2580 // loops, the current loop, and any parent loops which shared exit blocks
2581 // with the current loop. As a consequence, we need to re-form LCSSA for
2582 // them. But we shouldn't need to re-form LCSSA for any child loops.
2583 // FIXME: This could be made more efficient by tracking which exit blocks are
2584 // new, and focusing on them, but that isn't likely to be necessary.
2585 //
2586 // In order to reasonably rebuild LCSSA we need to walk inside-out across the
2587 // loop nest and update every loop that could have had its exits changed. We
2588 // also need to cover any intervening loops. We add all of these loops to
2589 // a list and sort them by loop depth to achieve this without updating
2590 // unnecessary loops.
2591 auto UpdateLoop = [&](Loop &UpdateL) {
2592#ifndef NDEBUG
2593 UpdateL.verifyLoop();
2594 for (Loop *ChildL : UpdateL) {
2595 ChildL->verifyLoop();
2596 assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&
2597 "Perturbed a child loop's LCSSA form!");
2598 }
2599#endif
2600 // First build LCSSA for this loop so that we can preserve it when
2601 // forming dedicated exits. We don't want to perturb some other loop's
2602 // LCSSA while doing that CFG edit.
2603 formLCSSA(UpdateL, DT, &LI, SE);
2604
2605 // For loops reached by this loop's original exit blocks we may
2606 // introduced new, non-dedicated exits. At least try to re-form dedicated
2607 // exits for these loops. This may fail if they couldn't have dedicated
2608 // exits to start with.
2609 formDedicatedExitBlocks(&UpdateL, &DT, &LI, MSSAU, /*PreserveLCSSA*/ true);
2610 };
2611
2612 // For non-child cloned loops and hoisted loops, we just need to update LCSSA
2613 // and we can do it in any order as they don't nest relative to each other.
2614 //
2615 // Also check if any of the loops we have updated have become top-level loops
2616 // as that will necessitate widening the outer loop scope.
2617 for (Loop *UpdatedL :
2618 llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
2619 UpdateLoop(*UpdatedL);
2620 if (UpdatedL->isOutermost())
2621 OuterExitL = nullptr;
2622 }
2623 if (IsStillLoop) {
2624 UpdateLoop(L);
2625 if (L.isOutermost())
2626 OuterExitL = nullptr;
2627 }
2628
2629 // If the original loop had exit blocks, walk up through the outer most loop
2630 // of those exit blocks to update LCSSA and form updated dedicated exits.
2631 if (OuterExitL != &L)
2632 for (Loop *OuterL = ParentL; OuterL != OuterExitL;
2633 OuterL = OuterL->getParentLoop())
2634 UpdateLoop(*OuterL);
2635
2636#ifndef NDEBUG
2637 // Verify the entire loop structure to catch any incorrect updates before we
2638 // progress in the pass pipeline.
2639 LI.verify(DT);
2640#endif
2641
2642 // Now that we've unswitched something, make callbacks to report the changes.
2643 // For that we need to merge together the updated loops and the cloned loops
2644 // and check whether the original loop survived.
2645 SmallVector<Loop *, 4> SibLoops;
2646 for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
2647 if (UpdatedL->getParentLoop() == ParentL)
2648 SibLoops.push_back(UpdatedL);
2649 postUnswitch(L, LoopUpdater, LoopName, IsStillLoop, PartiallyInvariant,
2650 InjectedCondition, SibLoops);
2651
2652 if (MSSAU && VerifyMemorySSA)
2653 MSSAU->getMemorySSA()->verifyMemorySSA();
2654
2655 if (BI)
2656 ++NumBranches;
2657 else
2658 ++NumSwitches;
2659}
2660
2661/// Recursively compute the cost of a dominator subtree based on the per-block
2662/// cost map provided.
2663///
2664/// The recursive computation is memozied into the provided DT-indexed cost map
2665/// to allow querying it for most nodes in the domtree without it becoming
2666/// quadratic.
2668 DomTreeNode &N,
2671 // Don't accumulate cost (or recurse through) blocks not in our block cost
2672 // map and thus not part of the duplication cost being considered.
2673 auto BBCostIt = BBCostMap.find(N.getBlock());
2674 if (BBCostIt == BBCostMap.end())
2675 return 0;
2676
2677 // Lookup this node to see if we already computed its cost.
2678 auto DTCostIt = DTCostMap.find(&N);
2679 if (DTCostIt != DTCostMap.end())
2680 return DTCostIt->second;
2681
2682 // If not, we have to compute it. We can't use insert above and update
2683 // because computing the cost may insert more things into the map.
2684 InstructionCost Cost = std::accumulate(
2685 N.begin(), N.end(), BBCostIt->second,
2686 [&](InstructionCost Sum, DomTreeNode *ChildN) -> InstructionCost {
2687 return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
2688 });
2689 bool Inserted = DTCostMap.insert({&N, Cost}).second;
2690 (void)Inserted;
2691 assert(Inserted && "Should not insert a node while visiting children!");
2692 return Cost;
2693}
2694
2695/// Turns a select instruction into implicit control flow branch,
2696/// making the following replacement:
2697///
2698/// head:
2699/// --code before select--
2700/// select %cond, %trueval, %falseval
2701/// --code after select--
2702///
2703/// into
2704///
2705/// head:
2706/// --code before select--
2707/// br i1 %cond, label %then, label %tail
2708///
2709/// then:
2710/// br %tail
2711///
2712/// tail:
2713/// phi [ %trueval, %then ], [ %falseval, %head]
2714/// unreachable
2715///
2716/// It also makes all relevant DT and LI updates, so that all structures are in
2717/// valid state after this transform.
2719 LoopInfo &LI, MemorySSAUpdater *MSSAU,
2720 AssumptionCache *AC) {
2721 LLVM_DEBUG(dbgs() << "Turning " << *SI << " into a branch.\n");
2722 BasicBlock *HeadBB = SI->getParent();
2723
2724 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
2725 SplitBlockAndInsertIfThen(SI->getCondition(), SI, false,
2726 SI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);
2727 auto *CondBr = cast<BranchInst>(HeadBB->getTerminator());
2728 BasicBlock *ThenBB = CondBr->getSuccessor(0),
2729 *TailBB = CondBr->getSuccessor(1);
2730 if (MSSAU)
2731 MSSAU->moveAllAfterSpliceBlocks(HeadBB, TailBB, SI);
2732
2733 PHINode *Phi =
2734 PHINode::Create(SI->getType(), 2, "unswitched.select", SI->getIterator());
2735 Phi->addIncoming(SI->getTrueValue(), ThenBB);
2736 Phi->addIncoming(SI->getFalseValue(), HeadBB);
2737 Phi->setDebugLoc(SI->getDebugLoc());
2738 SI->replaceAllUsesWith(Phi);
2739 SI->eraseFromParent();
2740
2741 if (MSSAU && VerifyMemorySSA)
2742 MSSAU->getMemorySSA()->verifyMemorySSA();
2743
2744 ++NumSelects;
2745 return CondBr;
2746}
2747
2748/// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
2749/// making the following replacement:
2750///
2751/// --code before guard--
2752/// call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
2753/// --code after guard--
2754///
2755/// into
2756///
2757/// --code before guard--
2758/// br i1 %cond, label %guarded, label %deopt
2759///
2760/// guarded:
2761/// --code after guard--
2762///
2763/// deopt:
2764/// call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
2765/// unreachable
2766///
2767/// It also makes all relevant DT and LI updates, so that all structures are in
2768/// valid state after this transform.
2770 DominatorTree &DT, LoopInfo &LI,
2771 MemorySSAUpdater *MSSAU) {
2772 LLVM_DEBUG(dbgs() << "Turning " << *GI << " into a branch.\n");
2773 BasicBlock *CheckBB = GI->getParent();
2774
2775 if (MSSAU && VerifyMemorySSA)
2776 MSSAU->getMemorySSA()->verifyMemorySSA();
2777
2778 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
2779 Instruction *DeoptBlockTerm =
2781 GI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);
2782 BranchInst *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
2783 // SplitBlockAndInsertIfThen inserts control flow that branches to
2784 // DeoptBlockTerm if the condition is true. We want the opposite.
2785 CheckBI->swapSuccessors();
2786
2787 BasicBlock *GuardedBlock = CheckBI->getSuccessor(0);
2788 GuardedBlock->setName("guarded");
2789 CheckBI->getSuccessor(1)->setName("deopt");
2790 BasicBlock *DeoptBlock = CheckBI->getSuccessor(1);
2791
2792 if (MSSAU)
2793 MSSAU->moveAllAfterSpliceBlocks(CheckBB, GuardedBlock, GI);
2794
2795 GI->moveBefore(DeoptBlockTerm->getIterator());
2797
2798 if (MSSAU) {
2799 MemoryDef *MD = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(GI));
2800 MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::BeforeTerminator);
2801 if (VerifyMemorySSA)
2802 MSSAU->getMemorySSA()->verifyMemorySSA();
2803 }
2804
2805 if (VerifyLoopInfo)
2806 LI.verify(DT);
2807 ++NumGuards;
2808 return CheckBI;
2809}
2810
2811/// Cost multiplier is a way to limit potentially exponential behavior
2812/// of loop-unswitch. Cost is multipied in proportion of 2^number of unswitch
2813/// candidates available. Also accounting for the number of "sibling" loops with
2814/// the idea to account for previous unswitches that already happened on this
2815/// cluster of loops. There was an attempt to keep this formula simple,
2816/// just enough to limit the worst case behavior. Even if it is not that simple
2817/// now it is still not an attempt to provide a detailed heuristic size
2818/// prediction.
2819///
2820/// TODO: Make a proper accounting of "explosion" effect for all kinds of
2821/// unswitch candidates, making adequate predictions instead of wild guesses.
2822/// That requires knowing not just the number of "remaining" candidates but
2823/// also costs of unswitching for each of these candidates.
2825 const Instruction &TI, const Loop &L, const LoopInfo &LI,
2826 const DominatorTree &DT,
2827 ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates) {
2828
2829 // Guards and other exiting conditions do not contribute to exponential
2830 // explosion as soon as they dominate the latch (otherwise there might be
2831 // another path to the latch remaining that does not allow to eliminate the
2832 // loop copy on unswitch).
2833 const BasicBlock *Latch = L.getLoopLatch();
2834 const BasicBlock *CondBlock = TI.getParent();
2835 if (DT.dominates(CondBlock, Latch) &&
2836 (isGuard(&TI) ||
2837 (TI.isTerminator() &&
2838 llvm::count_if(successors(&TI), [&L](const BasicBlock *SuccBB) {
2839 return L.contains(SuccBB);
2840 }) <= 1))) {
2841 NumCostMultiplierSkipped++;
2842 return 1;
2843 }
2844
2845 auto *ParentL = L.getParentLoop();
2846 int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
2847 : std::distance(LI.begin(), LI.end()));
2848 // Count amount of clones that all the candidates might cause during
2849 // unswitching. Branch/guard/select counts as 1, switch counts as log2 of its
2850 // cases.
2851 int UnswitchedClones = 0;
2852 for (const auto &Candidate : UnswitchCandidates) {
2853 const Instruction *CI = Candidate.TI;
2854 const BasicBlock *CondBlock = CI->getParent();
2855 bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);
2856 if (isa<SelectInst>(CI)) {
2857 UnswitchedClones++;
2858 continue;
2859 }
2860 if (isGuard(CI)) {
2861 if (!SkipExitingSuccessors)
2862 UnswitchedClones++;
2863 continue;
2864 }
2865 int NonExitingSuccessors =
2866 llvm::count_if(successors(CondBlock),
2867 [SkipExitingSuccessors, &L](const BasicBlock *SuccBB) {
2868 return !SkipExitingSuccessors || L.contains(SuccBB);
2869 });
2870 UnswitchedClones += Log2_32(NonExitingSuccessors);
2871 }
2872
2873 // Ignore up to the "unscaled candidates" number of unswitch candidates
2874 // when calculating the power-of-two scaling of the cost. The main idea
2875 // with this control is to allow a small number of unswitches to happen
2876 // and rely more on siblings multiplier (see below) when the number
2877 // of candidates is small.
2878 unsigned ClonesPower =
2879 std::max(UnswitchedClones - (int)UnswitchNumInitialUnscaledCandidates, 0);
2880
2881 // Allowing top-level loops to spread a bit more than nested ones.
2882 int SiblingsMultiplier =
2883 std::max((ParentL ? SiblingsCount
2884 : SiblingsCount / (int)UnswitchSiblingsToplevelDiv),
2885 1);
2886 // Compute the cost multiplier in a way that won't overflow by saturating
2887 // at an upper bound.
2888 int CostMultiplier;
2889 if (ClonesPower > Log2_32(UnswitchThreshold) ||
2890 SiblingsMultiplier > UnswitchThreshold)
2891 CostMultiplier = UnswitchThreshold;
2892 else
2893 CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
2894 (int)UnswitchThreshold);
2895
2896 LLVM_DEBUG(dbgs() << " Computed multiplier " << CostMultiplier
2897 << " (siblings " << SiblingsMultiplier << " * clones "
2898 << (1 << ClonesPower) << ")"
2899 << " for unswitch candidate: " << TI << "\n");
2900 return CostMultiplier;
2901}
2902
2905 IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch,
2906 const Loop &L, const LoopInfo &LI, AAResults &AA,
2907 const MemorySSAUpdater *MSSAU) {
2908 assert(UnswitchCandidates.empty() && "Should be!");
2909
2910 auto AddUnswitchCandidatesForInst = [&](Instruction *I, Value *Cond) {
2912 if (isa<Constant>(Cond))
2913 return;
2914 if (L.isLoopInvariant(Cond)) {
2915 UnswitchCandidates.push_back({I, {Cond}});
2916 return;
2917 }
2919 TinyPtrVector<Value *> Invariants =
2921 L, *static_cast<Instruction *>(Cond), LI);
2922 if (!Invariants.empty())
2923 UnswitchCandidates.push_back({I, std::move(Invariants)});
2924 }
2925 };
2926
2927 // Whether or not we should also collect guards in the loop.
2928 bool CollectGuards = false;
2929 if (UnswitchGuards) {
2930 auto *GuardDecl = Intrinsic::getDeclarationIfExists(
2931 L.getHeader()->getParent()->getParent(), Intrinsic::experimental_guard);
2932 if (GuardDecl && !GuardDecl->use_empty())
2933 CollectGuards = true;
2934 }
2935
2936 for (auto *BB : L.blocks()) {
2937 if (LI.getLoopFor(BB) != &L)
2938 continue;
2939
2940 for (auto &I : *BB) {
2941 if (auto *SI = dyn_cast<SelectInst>(&I)) {
2942 auto *Cond = SI->getCondition();
2943 // Do not unswitch vector selects and logical and/or selects
2944 if (Cond->getType()->isIntegerTy(1) && !SI->getType()->isIntegerTy(1))
2945 AddUnswitchCandidatesForInst(SI, Cond);
2946 } else if (CollectGuards && isGuard(&I)) {
2947 auto *Cond =
2948 skipTrivialSelect(cast<IntrinsicInst>(&I)->getArgOperand(0));
2949 // TODO: Support AND, OR conditions and partial unswitching.
2950 if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
2951 UnswitchCandidates.push_back({&I, {Cond}});
2952 }
2953 }
2954
2955 if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
2956 // We can only consider fully loop-invariant switch conditions as we need
2957 // to completely eliminate the switch after unswitching.
2958 if (!isa<Constant>(SI->getCondition()) &&
2959 L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor())
2960 UnswitchCandidates.push_back({SI, {SI->getCondition()}});
2961 continue;
2962 }
2963
2964 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
2965 if (!BI || !BI->isConditional() ||
2966 BI->getSuccessor(0) == BI->getSuccessor(1))
2967 continue;
2968
2969 AddUnswitchCandidatesForInst(BI, BI->getCondition());
2970 }
2971
2972 if (MSSAU && !findOptionMDForLoop(&L, "llvm.loop.unswitch.partial.disable") &&
2973 !any_of(UnswitchCandidates, [&L](auto &TerminatorAndInvariants) {
2974 return TerminatorAndInvariants.TI == L.getHeader()->getTerminator();
2975 })) {
2976 MemorySSA *MSSA = MSSAU->getMemorySSA();
2977 if (auto Info = hasPartialIVCondition(L, MSSAThreshold, *MSSA, AA)) {
2978 LLVM_DEBUG(
2979 dbgs() << "simple-loop-unswitch: Found partially invariant condition "
2980 << *Info->InstToDuplicate[0] << "\n");
2981 PartialIVInfo = *Info;
2982 PartialIVCondBranch = L.getHeader()->getTerminator();
2983 TinyPtrVector<Value *> ValsToDuplicate;
2984 llvm::append_range(ValsToDuplicate, Info->InstToDuplicate);
2985 UnswitchCandidates.push_back(
2986 {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)});
2987 }
2988 }
2989 return !UnswitchCandidates.empty();
2990}
2991
2992/// Tries to canonicalize condition described by:
2993///
2994/// br (LHS pred RHS), label IfTrue, label IfFalse
2995///
2996/// into its equivalent where `Pred` is something that we support for injected
2997/// invariants (so far it is limited to ult), LHS in canonicalized form is
2998/// non-invariant and RHS is an invariant.
3000 Value *&LHS, Value *&RHS,
3001 BasicBlock *&IfTrue,
3002 BasicBlock *&IfFalse,
3003 const Loop &L) {
3004 if (!L.contains(IfTrue)) {
3005 Pred = ICmpInst::getInversePredicate(Pred);
3006 std::swap(IfTrue, IfFalse);
3007 }
3008
3009 // Move loop-invariant argument to RHS position.
3010 if (L.isLoopInvariant(LHS)) {
3011 Pred = ICmpInst::getSwappedPredicate(Pred);
3012 std::swap(LHS, RHS);
3013 }
3014
3015 if (Pred == ICmpInst::ICMP_SGE && match(RHS, m_Zero())) {
3016 // Turn "x >=s 0" into "x <u UMIN_INT"
3017 Pred = ICmpInst::ICMP_ULT;
3018 RHS = ConstantInt::get(
3019 RHS->getContext(),
3021 }
3022}
3023
3024/// Returns true, if predicate described by ( \p Pred, \p LHS, \p RHS )
3025/// succeeding into blocks ( \p IfTrue, \p IfFalse) can be optimized by
3026/// injecting a loop-invariant condition.
3028 const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS,
3029 const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L) {
3030 if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS))
3031 return false;
3032 // TODO: Support other predicates.
3033 if (Pred != ICmpInst::ICMP_ULT)
3034 return false;
3035 // TODO: Support non-loop-exiting branches?
3036 if (!L.contains(IfTrue) || L.contains(IfFalse))
3037 return false;
3038 // FIXME: For some reason this causes problems with MSSA updates, need to
3039 // investigate why. So far, just don't unswitch latch.
3040 if (L.getHeader() == IfTrue)
3041 return false;
3042 return true;
3043}
3044
3045/// Returns true, if metadata on \p BI allows us to optimize branching into \p
3046/// TakenSucc via injection of invariant conditions. The branch should be not
3047/// enough and not previously unswitched, the information about this comes from
3048/// the metadata.
3050 const BasicBlock *TakenSucc) {
3051 SmallVector<uint32_t> Weights;
3052 if (!extractBranchWeights(*BI, Weights))
3053 return false;
3055 BranchProbability LikelyTaken(T - 1, T);
3056
3057 assert(Weights.size() == 2 && "Unexpected profile data!");
3058 size_t Idx = BI->getSuccessor(0) == TakenSucc ? 0 : 1;
3059 auto Num = Weights[Idx];
3060 auto Denom = Weights[0] + Weights[1];
3061 // Degenerate or overflowed metadata.
3062 if (Denom == 0 || Num > Denom)
3063 return false;
3064 BranchProbability ActualTaken(Num, Denom);
3065 if (LikelyTaken > ActualTaken)
3066 return false;
3067 return true;
3068}
3069
3070/// Materialize pending invariant condition of the given candidate into IR. The
3071/// injected loop-invariant condition implies the original loop-variant branch
3072/// condition, so the materialization turns
3073///
3074/// loop_block:
3075/// ...
3076/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
3077///
3078/// into
3079///
3080/// preheader:
3081/// %invariant_cond = LHS pred RHS
3082/// ...
3083/// loop_block:
3084/// br i1 %invariant_cond, label InLoopSucc, label OriginalCheck
3085/// OriginalCheck:
3086/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
3087/// ...
3088static NonTrivialUnswitchCandidate
3089injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L,
3090 DominatorTree &DT, LoopInfo &LI,
3091 AssumptionCache &AC, MemorySSAUpdater *MSSAU) {
3092 assert(Candidate.hasPendingInjection() && "Nothing to inject!");
3093 BasicBlock *Preheader = L.getLoopPreheader();
3094 assert(Preheader && "Loop is not in simplified form?");
3095 assert(LI.getLoopFor(Candidate.TI->getParent()) == &L &&
3096 "Unswitching branch of inner loop!");
3097
3098 auto Pred = Candidate.PendingInjection->Pred;
3099 auto *LHS = Candidate.PendingInjection->LHS;
3100 auto *RHS = Candidate.PendingInjection->RHS;
3101 auto *InLoopSucc = Candidate.PendingInjection->InLoopSucc;
3102 auto *TI = cast<BranchInst>(Candidate.TI);
3103 auto *BB = Candidate.TI->getParent();
3104 auto *OutOfLoopSucc = InLoopSucc == TI->getSuccessor(0) ? TI->getSuccessor(1)
3105 : TI->getSuccessor(0);
3106 // FIXME: Remove this once limitation on successors is lifted.
3107 assert(L.contains(InLoopSucc) && "Not supported yet!");
3108 assert(!L.contains(OutOfLoopSucc) && "Not supported yet!");
3109 auto &Ctx = BB->getContext();
3110
3111 IRBuilder<> Builder(Preheader->getTerminator());
3112 assert(ICmpInst::isUnsigned(Pred) && "Not supported yet!");
3113 if (LHS->getType() != RHS->getType()) {
3114 if (LHS->getType()->getIntegerBitWidth() <
3116 LHS = Builder.CreateZExt(LHS, RHS->getType(), LHS->getName() + ".wide");
3117 else
3118 RHS = Builder.CreateZExt(RHS, LHS->getType(), RHS->getName() + ".wide");
3119 }
3120 // Do not use builder here: CreateICmp may simplify this into a constant and
3121 // unswitching will break. Better optimize it away later.
3122 auto *InjectedCond =
3123 ICmpInst::Create(Instruction::ICmp, Pred, LHS, RHS, "injected.cond",
3124 Preheader->getTerminator()->getIterator());
3125
3126 BasicBlock *CheckBlock = BasicBlock::Create(Ctx, BB->getName() + ".check",
3127 BB->getParent(), InLoopSucc);
3128 Builder.SetInsertPoint(TI);
3129 auto *InvariantBr =
3130 Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock);
3131
3132 Builder.SetInsertPoint(CheckBlock);
3133 Builder.CreateCondBr(TI->getCondition(), TI->getSuccessor(0),
3134 TI->getSuccessor(1));
3135 TI->eraseFromParent();
3136
3137 // Fixup phis.
3138 for (auto &I : *InLoopSucc) {
3139 auto *PN = dyn_cast<PHINode>(&I);
3140 if (!PN)
3141 break;
3142 auto *Inc = PN->getIncomingValueForBlock(BB);
3143 PN->addIncoming(Inc, CheckBlock);
3144 }
3145 OutOfLoopSucc->replacePhiUsesWith(BB, CheckBlock);
3146
3148 { DominatorTree::Insert, BB, CheckBlock },
3149 { DominatorTree::Insert, CheckBlock, InLoopSucc },
3150 { DominatorTree::Insert, CheckBlock, OutOfLoopSucc },
3151 { DominatorTree::Delete, BB, OutOfLoopSucc }
3152 };
3153
3154 DT.applyUpdates(DTUpdates);
3155 if (MSSAU)
3156 MSSAU->applyUpdates(DTUpdates, DT);
3157 L.addBasicBlockToLoop(CheckBlock, LI);
3158
3159#ifndef NDEBUG
3160 DT.verify();
3161 LI.verify(DT);
3162 if (MSSAU && VerifyMemorySSA)
3163 MSSAU->getMemorySSA()->verifyMemorySSA();
3164#endif
3165
3166 // TODO: In fact, cost of unswitching a new invariant candidate is *slightly*
3167 // higher because we have just inserted a new block. Need to think how to
3168 // adjust the cost of injected candidates when it was first computed.
3169 LLVM_DEBUG(dbgs() << "Injected a new loop-invariant branch " << *InvariantBr
3170 << " and considering it for unswitching.");
3171 ++NumInvariantConditionsInjected;
3172 return NonTrivialUnswitchCandidate(InvariantBr, { InjectedCond },
3173 Candidate.Cost);
3174}
3175
3176/// Given chain of loop branch conditions looking like:
3177/// br (Variant < Invariant1)
3178/// br (Variant < Invariant2)
3179/// br (Variant < Invariant3)
3180/// ...
3181/// collect set of invariant conditions on which we want to unswitch, which
3182/// look like:
3183/// Invariant1 <= Invariant2
3184/// Invariant2 <= Invariant3
3185/// ...
3186/// Though they might not immediately exist in the IR, we can still inject them.
3188 SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates, Loop &L,
3190 const DominatorTree &DT) {
3191
3193 assert(ICmpInst::isStrictPredicate(Pred));
3194 if (Compares.size() < 2)
3195 return false;
3196 ICmpInst::Predicate NonStrictPred = ICmpInst::getNonStrictPredicate(Pred);
3197 for (auto Prev = Compares.begin(), Next = Compares.begin() + 1;
3198 Next != Compares.end(); ++Prev, ++Next) {
3199 Value *LHS = Next->Invariant;
3200 Value *RHS = Prev->Invariant;
3201 BasicBlock *InLoopSucc = Prev->InLoopSucc;
3202 InjectedInvariant ToInject(NonStrictPred, LHS, RHS, InLoopSucc);
3203 NonTrivialUnswitchCandidate Candidate(Prev->Term, { LHS, RHS },
3204 std::nullopt, std::move(ToInject));
3205 UnswitchCandidates.push_back(std::move(Candidate));
3206 }
3207 return true;
3208}
3209
3210/// Collect unswitch candidates by invariant conditions that are not immediately
3211/// present in the loop. However, they can be injected into the code if we
3212/// decide it's profitable.
3213/// An example of such conditions is following:
3214///
3215/// for (...) {
3216/// x = load ...
3217/// if (! x <u C1) break;
3218/// if (! x <u C2) break;
3219/// <do something>
3220/// }
3221///
3222/// We can unswitch by condition "C1 <=u C2". If that is true, then "x <u C1 <=
3223/// C2" automatically implies "x <u C2", so we can get rid of one of
3224/// loop-variant checks in unswitched loop version.
3227 IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L,
3228 const DominatorTree &DT, const LoopInfo &LI, AAResults &AA,
3229 const MemorySSAUpdater *MSSAU) {
3231 return false;
3232
3233 if (!DT.isReachableFromEntry(L.getHeader()))
3234 return false;
3235 auto *Latch = L.getLoopLatch();
3236 // Need to have a single latch and a preheader.
3237 if (!Latch)
3238 return false;
3239 assert(L.getLoopPreheader() && "Must have a preheader!");
3240
3242 // Traverse the conditions that dominate latch (and therefore dominate each
3243 // other).
3244 for (auto *DTN = DT.getNode(Latch); L.contains(DTN->getBlock());
3245 DTN = DTN->getIDom()) {
3246 CmpPredicate Pred;
3247 Value *LHS = nullptr, *RHS = nullptr;
3248 BasicBlock *IfTrue = nullptr, *IfFalse = nullptr;
3249 auto *BB = DTN->getBlock();
3250 // Ignore inner loops.
3251 if (LI.getLoopFor(BB) != &L)
3252 continue;
3253 auto *Term = BB->getTerminator();
3254 if (!match(Term, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
3255 m_BasicBlock(IfTrue), m_BasicBlock(IfFalse))))
3256 continue;
3257 if (!LHS->getType()->isIntegerTy())
3258 continue;
3259 canonicalizeForInvariantConditionInjection(Pred, LHS, RHS, IfTrue, IfFalse,
3260 L);
3261 if (!shouldTryInjectInvariantCondition(Pred, LHS, RHS, IfTrue, IfFalse, L))
3262 continue;
3263 if (!shouldTryInjectBasingOnMetadata(cast<BranchInst>(Term), IfTrue))
3264 continue;
3265 // Strip ZEXT for unsigned predicate.
3266 // TODO: once signed predicates are supported, also strip SEXT.
3267 CompareDesc Desc(cast<BranchInst>(Term), RHS, IfTrue);
3268 while (auto *Zext = dyn_cast<ZExtInst>(LHS))
3269 LHS = Zext->getOperand(0);
3270 CandidatesULT[LHS].push_back(Desc);
3271 }
3272
3273 bool Found = false;
3274 for (auto &It : CandidatesULT)
3276 UnswitchCandidates, L, ICmpInst::ICMP_ULT, It.second, DT);
3277 return Found;
3278}
3279
3281 if (!L.isSafeToClone())
3282 return false;
3283 for (auto *BB : L.blocks())
3284 for (auto &I : *BB) {
3285 if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
3286 return false;
3287 if (auto *CB = dyn_cast<CallBase>(&I)) {
3288 assert(!CB->cannotDuplicate() && "Checked by L.isSafeToClone().");
3289 if (CB->isConvergent())
3290 return false;
3291 }
3292 }
3293
3294 // Check if there are irreducible CFG cycles in this loop. If so, we cannot
3295 // easily unswitch non-trivial edges out of the loop. Doing so might turn the
3296 // irreducible control flow into reducible control flow and introduce new
3297 // loops "out of thin air". If we ever discover important use cases for doing
3298 // this, we can add support to loop unswitch, but it is a lot of complexity
3299 // for what seems little or no real world benefit.
3300 LoopBlocksRPO RPOT(&L);
3301 RPOT.perform(&LI);
3302 if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
3303 return false;
3304
3306 L.getUniqueExitBlocks(ExitBlocks);
3307 // We cannot unswitch if exit blocks contain a cleanuppad/catchswitch
3308 // instruction as we don't know how to split those exit blocks.
3309 // FIXME: We should teach SplitBlock to handle this and remove this
3310 // restriction.
3311 for (auto *ExitBB : ExitBlocks) {
3312 auto It = ExitBB->getFirstNonPHIIt();
3313 if (isa<CleanupPadInst>(It) || isa<CatchSwitchInst>(It)) {
3314 LLVM_DEBUG(dbgs() << "Cannot unswitch because of cleanuppad/catchswitch "
3315 "in exit block\n");
3316 return false;
3317 }
3318 }
3319
3320 return true;
3321}
3322
3323static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(
3324 ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates, const Loop &L,
3325 const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC,
3326 const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo) {
3327 // Given that unswitching these terminators will require duplicating parts of
3328 // the loop, so we need to be able to model that cost. Compute the ephemeral
3329 // values and set up a data structure to hold per-BB costs. We cache each
3330 // block's cost so that we don't recompute this when considering different
3331 // subsets of the loop for duplication during unswitching.
3333 CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);
3335
3336 // Compute the cost of each block, as well as the total loop cost. Also, bail
3337 // out if we see instructions which are incompatible with loop unswitching
3338 // (convergent, noduplicate, or cross-basic-block tokens).
3339 // FIXME: We might be able to safely handle some of these in non-duplicated
3340 // regions.
3342 L.getHeader()->getParent()->hasMinSize()
3345 InstructionCost LoopCost = 0;
3346 for (auto *BB : L.blocks()) {
3348 for (auto &I : *BB) {
3349 if (EphValues.count(&I))
3350 continue;
3352 }
3353 assert(Cost >= 0 && "Must not have negative costs!");
3354 LoopCost += Cost;
3355 assert(LoopCost >= 0 && "Must not have negative loop costs!");
3356 BBCostMap[BB] = Cost;
3357 }
3358 LLVM_DEBUG(dbgs() << " Total loop cost: " << LoopCost << "\n");
3359
3360 // Now we find the best candidate by searching for the one with the following
3361 // properties in order:
3362 //
3363 // 1) An unswitching cost below the threshold
3364 // 2) The smallest number of duplicated unswitch candidates (to avoid
3365 // creating redundant subsequent unswitching)
3366 // 3) The smallest cost after unswitching.
3367 //
3368 // We prioritize reducing fanout of unswitch candidates provided the cost
3369 // remains below the threshold because this has a multiplicative effect.
3370 //
3371 // This requires memoizing each dominator subtree to avoid redundant work.
3372 //
3373 // FIXME: Need to actually do the number of candidates part above.
3375 // Given a terminator which might be unswitched, computes the non-duplicated
3376 // cost for that terminator.
3377 auto ComputeUnswitchedCost = [&](Instruction &TI,
3378 bool FullUnswitch) -> InstructionCost {
3379 // Unswitching selects unswitches the entire loop.
3380 if (isa<SelectInst>(TI))
3381 return LoopCost;
3382
3383 BasicBlock &BB = *TI.getParent();
3385
3387 for (BasicBlock *SuccBB : successors(&BB)) {
3388 // Don't count successors more than once.
3389 if (!Visited.insert(SuccBB).second)
3390 continue;
3391
3392 // If this is a partial unswitch candidate, then it must be a conditional
3393 // branch with a condition of either `or`, `and`, their corresponding
3394 // select forms or partially invariant instructions. In that case, one of
3395 // the successors is necessarily duplicated, so don't even try to remove
3396 // its cost.
3397 if (!FullUnswitch) {
3398 auto &BI = cast<BranchInst>(TI);
3399 Value *Cond = skipTrivialSelect(BI.getCondition());
3400 if (match(Cond, m_LogicalAnd())) {
3401 if (SuccBB == BI.getSuccessor(1))
3402 continue;
3403 } else if (match(Cond, m_LogicalOr())) {
3404 if (SuccBB == BI.getSuccessor(0))
3405 continue;
3406 } else if ((PartialIVInfo.KnownValue->isOneValue() &&
3407 SuccBB == BI.getSuccessor(0)) ||
3408 (!PartialIVInfo.KnownValue->isOneValue() &&
3409 SuccBB == BI.getSuccessor(1)))
3410 continue;
3411 }
3412
3413 // This successor's domtree will not need to be duplicated after
3414 // unswitching if the edge to the successor dominates it (and thus the
3415 // entire tree). This essentially means there is no other path into this
3416 // subtree and so it will end up live in only one clone of the loop.
3417 if (SuccBB->getUniquePredecessor() ||
3418 llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
3419 return PredBB == &BB || DT.dominates(SuccBB, PredBB);
3420 })) {
3421 Cost += computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
3422 assert(Cost <= LoopCost &&
3423 "Non-duplicated cost should never exceed total loop cost!");
3424 }
3425 }
3426
3427 // Now scale the cost by the number of unique successors minus one. We
3428 // subtract one because there is already at least one copy of the entire
3429 // loop. This is computing the new cost of unswitching a condition.
3430 // Note that guards always have 2 unique successors that are implicit and
3431 // will be materialized if we decide to unswitch it.
3432 int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size();
3433 assert(SuccessorsCount > 1 &&
3434 "Cannot unswitch a condition without multiple distinct successors!");
3435 return (LoopCost - Cost) * (SuccessorsCount - 1);
3436 };
3437
3438 std::optional<NonTrivialUnswitchCandidate> Best;
3439 for (auto &Candidate : UnswitchCandidates) {
3440 Instruction &TI = *Candidate.TI;
3441 ArrayRef<Value *> Invariants = Candidate.Invariants;
3442 BranchInst *BI = dyn_cast<BranchInst>(&TI);
3443 bool FullUnswitch =
3444 !BI || Candidate.hasPendingInjection() ||
3445 (Invariants.size() == 1 &&
3446 Invariants[0] == skipTrivialSelect(BI->getCondition()));
3447 InstructionCost CandidateCost = ComputeUnswitchedCost(TI, FullUnswitch);
3448 // Calculate cost multiplier which is a tool to limit potentially
3449 // exponential behavior of loop-unswitch.
3451 int CostMultiplier =
3452 CalculateUnswitchCostMultiplier(TI, L, LI, DT, UnswitchCandidates);
3453 assert(
3454 (CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) &&
3455 "cost multiplier needs to be in the range of 1..UnswitchThreshold");
3456 CandidateCost *= CostMultiplier;
3457 LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
3458 << " (multiplier: " << CostMultiplier << ")"
3459 << " for unswitch candidate: " << TI << "\n");
3460 } else {
3461 LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
3462 << " for unswitch candidate: " << TI << "\n");
3463 }
3464
3465 if (!Best || CandidateCost < Best->Cost) {
3466 Best = Candidate;
3467 Best->Cost = CandidateCost;
3468 }
3469 }
3470 assert(Best && "Must be!");
3471 return *Best;
3472}
3473
3474// Insert a freeze on an unswitched branch if all is true:
3475// 1. freeze-loop-unswitch-cond option is true
3476// 2. The branch may not execute in the loop pre-transformation. If a branch may
3477// not execute and could cause UB, it would always cause UB if it is hoisted outside
3478// of the loop. Insert a freeze to prevent this case.
3479// 3. The branch condition may be poison or undef
3481 AssumptionCache &AC) {
3482 assert(isa<BranchInst>(TI) || isa<SwitchInst>(TI));
3484 return false;
3485
3486 ICFLoopSafetyInfo SafetyInfo;
3487 SafetyInfo.computeLoopSafetyInfo(&L);
3488 if (SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
3489 return false;
3490
3491 Value *Cond;
3492 if (BranchInst *BI = dyn_cast<BranchInst>(&TI))
3493 Cond = skipTrivialSelect(BI->getCondition());
3494 else
3495 Cond = skipTrivialSelect(cast<SwitchInst>(&TI)->getCondition());
3497 Cond, &AC, L.getLoopPreheader()->getTerminator(), &DT);
3498}
3499
3501 AssumptionCache &AC, AAResults &AA,
3503 MemorySSAUpdater *MSSAU,
3504 LPMUpdater &LoopUpdater) {
3505 // Collect all invariant conditions within this loop (as opposed to an inner
3506 // loop which would be handled when visiting that inner loop).
3508 IVConditionInfo PartialIVInfo;
3509 Instruction *PartialIVCondBranch = nullptr;
3510 collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo,
3511 PartialIVCondBranch, L, LI, AA, MSSAU);
3512 if (!findOptionMDForLoop(&L, "llvm.loop.unswitch.injection.disable"))
3513 collectUnswitchCandidatesWithInjections(UnswitchCandidates, PartialIVInfo,
3514 PartialIVCondBranch, L, DT, LI, AA,
3515 MSSAU);
3516 // If we didn't find any candidates, we're done.
3517 if (UnswitchCandidates.empty())
3518 return false;
3519
3520 LLVM_DEBUG(
3521 dbgs() << "Considering " << UnswitchCandidates.size()
3522 << " non-trivial loop invariant conditions for unswitching.\n");
3523
3524 NonTrivialUnswitchCandidate Best = findBestNonTrivialUnswitchCandidate(
3525 UnswitchCandidates, L, DT, LI, AC, TTI, PartialIVInfo);
3526
3527 assert(Best.TI && "Failed to find loop unswitch candidate");
3528 assert(Best.Cost && "Failed to compute cost");
3529
3530 if (*Best.Cost >= UnswitchThreshold) {
3531 LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: " << *Best.Cost
3532 << "\n");
3533 return false;
3534 }
3535
3536 bool InjectedCondition = false;
3537 if (Best.hasPendingInjection()) {
3538 Best = injectPendingInvariantConditions(Best, L, DT, LI, AC, MSSAU);
3539 InjectedCondition = true;
3540 }
3541 assert(!Best.hasPendingInjection() &&
3542 "All injections should have been done by now!");
3543
3544 if (Best.TI != PartialIVCondBranch)
3545 PartialIVInfo.InstToDuplicate.clear();
3546
3547 bool InsertFreeze;
3548 if (auto *SI = dyn_cast<SelectInst>(Best.TI)) {
3549 // If the best candidate is a select, turn it into a branch. Select
3550 // instructions with a poison conditional do not propagate poison, but
3551 // branching on poison causes UB. Insert a freeze on the select
3552 // conditional to prevent UB after turning the select into a branch.
3553 InsertFreeze = !isGuaranteedNotToBeUndefOrPoison(
3554 SI->getCondition(), &AC, L.getLoopPreheader()->getTerminator(), &DT);
3555 Best.TI = turnSelectIntoBranch(SI, DT, LI, MSSAU, &AC);
3556 } else {
3557 // If the best candidate is a guard, turn it into a branch.
3558 if (isGuard(Best.TI))
3559 Best.TI =
3560 turnGuardIntoBranch(cast<IntrinsicInst>(Best.TI), L, DT, LI, MSSAU);
3561 InsertFreeze = shouldInsertFreeze(L, *Best.TI, DT, AC);
3562 }
3563
3564 LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = " << Best.Cost
3565 << ") terminator: " << *Best.TI << "\n");
3566 unswitchNontrivialInvariants(L, *Best.TI, Best.Invariants, PartialIVInfo, DT,
3567 LI, AC, SE, MSSAU, LoopUpdater, InsertFreeze,
3568 InjectedCondition);
3569 return true;
3570}
3571
3572/// Unswitch control flow predicated on loop invariant conditions.
3573///
3574/// This first hoists all branches or switches which are trivial (IE, do not
3575/// require duplicating any part of the loop) out of the loop body. It then
3576/// looks at other loop invariant control flows and tries to unswitch those as
3577/// well by cloning the loop if the result is small enough.
3578///
3579/// The `DT`, `LI`, `AC`, `AA`, `TTI` parameters are required analyses that are
3580/// also updated based on the unswitch. The `MSSA` analysis is also updated if
3581/// valid (i.e. its use is enabled).
3582///
3583/// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is
3584/// true, we will attempt to do non-trivial unswitching as well as trivial
3585/// unswitching.
3586///
3587/// The `postUnswitch` function will be run after unswitching is complete
3588/// with information on whether or not the provided loop remains a loop and
3589/// a list of new sibling loops created.
3590///
3591/// If `SE` is non-null, we will update that analysis based on the unswitching
3592/// done.
3593static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
3594 AssumptionCache &AC, AAResults &AA,
3595 TargetTransformInfo &TTI, bool Trivial,
3596 bool NonTrivial, ScalarEvolution *SE,
3598 BlockFrequencyInfo *BFI, LPMUpdater &LoopUpdater) {
3599 assert(L.isRecursivelyLCSSAForm(DT, LI) &&
3600 "Loops must be in LCSSA form before unswitching.");
3601
3602 // Must be in loop simplified form: we need a preheader and dedicated exits.
3603 if (!L.isLoopSimplifyForm())
3604 return false;
3605
3606 // Try trivial unswitch first before loop over other basic blocks in the loop.
3607 if (Trivial && unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) {
3608 // If we unswitched successfully we will want to clean up the loop before
3609 // processing it further so just mark it as unswitched and return.
3610 postUnswitch(L, LoopUpdater, L.getName(),
3611 /*CurrentLoopValid*/ true, /*PartiallyInvariant*/ false,
3612 /*InjectedCondition*/ false, {});
3613 return true;
3614 }
3615
3616 const Function *F = L.getHeader()->getParent();
3617
3618 // Check whether we should continue with non-trivial conditions.
3619 // EnableNonTrivialUnswitch: Global variable that forces non-trivial
3620 // unswitching for testing and debugging.
3621 // NonTrivial: Parameter that enables non-trivial unswitching for this
3622 // invocation of the transform. But this should be allowed only
3623 // for targets without branch divergence.
3624 //
3625 // FIXME: If divergence analysis becomes available to a loop
3626 // transform, we should allow unswitching for non-trivial uniform
3627 // branches even on targets that have divergence.
3628 // https://bugs.llvm.org/show_bug.cgi?id=48819
3629 bool ContinueWithNonTrivial =
3631 if (!ContinueWithNonTrivial)
3632 return false;
3633
3634 // Skip non-trivial unswitching for optsize functions.
3635 if (F->hasOptSize())
3636 return false;
3637
3638 // Returns true if Loop L's loop nest is cold, i.e. if the headers of L,
3639 // of the loops L is nested in, and of the loops nested in L are all cold.
3640 auto IsLoopNestCold = [&](const Loop *L) {
3641 // Check L and all of its parent loops.
3642 auto *Parent = L;
3643 while (Parent) {
3644 if (!PSI->isColdBlock(Parent->getHeader(), BFI))
3645 return false;
3646 Parent = Parent->getParentLoop();
3647 }
3648 // Next check all loops nested within L.
3650 llvm::append_range(Worklist, L->getSubLoops());
3651 while (!Worklist.empty()) {
3652 auto *CurLoop = Worklist.pop_back_val();
3653 if (!PSI->isColdBlock(CurLoop->getHeader(), BFI))
3654 return false;
3655 llvm::append_range(Worklist, CurLoop->getSubLoops());
3656 }
3657 return true;
3658 };
3659
3660 // Skip cold loops in cold loop nests, as unswitching them brings little
3661 // benefit but increases the code size
3662 if (PSI && PSI->hasProfileSummary() && BFI && IsLoopNestCold(&L)) {
3663 LLVM_DEBUG(dbgs() << " Skip cold loop: " << L << "\n");
3664 return false;
3665 }
3666
3667 // Perform legality checks.
3669 return false;
3670
3671 // For non-trivial unswitching, because it often creates new loops, we rely on
3672 // the pass manager to iterate on the loops rather than trying to immediately
3673 // reach a fixed point. There is no substantial advantage to iterating
3674 // internally, and if any of the new loops are simplified enough to contain
3675 // trivial unswitching we want to prefer those.
3676
3677 // Try to unswitch the best invariant condition. We prefer this full unswitch to
3678 // a partial unswitch when possible below the threshold.
3679 if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, SE, MSSAU, LoopUpdater))
3680 return true;
3681
3682 // No other opportunities to unswitch.
3683 return false;
3684}
3685
3688 LPMUpdater &U) {
3689 Function &F = *L.getHeader()->getParent();
3690 (void)F;
3691 ProfileSummaryInfo *PSI = nullptr;
3692 if (auto OuterProxy =
3694 .getCachedResult<ModuleAnalysisManagerFunctionProxy>(F))
3695 PSI = OuterProxy->getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
3696 LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L
3697 << "\n");
3698
3699 std::optional<MemorySSAUpdater> MSSAU;
3700 if (AR.MSSA) {
3701 MSSAU = MemorySSAUpdater(AR.MSSA);
3702 if (VerifyMemorySSA)
3703 AR.MSSA->verifyMemorySSA();
3704 }
3705 if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
3706 &AR.SE, MSSAU ? &*MSSAU : nullptr, PSI, AR.BFI, U))
3707 return PreservedAnalyses::all();
3708
3709 if (AR.MSSA && VerifyMemorySSA)
3710 AR.MSSA->verifyMemorySSA();
3711
3712 // Historically this pass has had issues with the dominator tree so verify it
3713 // in asserts builds.
3714 assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast));
3715
3716 auto PA = getLoopPassPreservedAnalyses();
3717 if (AR.MSSA)
3718 PA.preserve<MemorySSAAnalysis>();
3719 return PA;
3720}
3721
3723 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
3725 OS, MapClassName2PassName);
3726
3727 OS << '<';
3728 OS << (NonTrivial ? "" : "no-") << "nontrivial;";
3729 OS << (Trivial ? "" : "no-") << "trivial";
3730 OS << '>';
3731}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
This file defines a set of templates that efficiently compute a dominator tree over a generic graph.
Module.h This file contains the declarations for the Module class.
This defines the Use class.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
This header provides classes for managing per-loop analyses.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
uint64_t IntrinsicInst * II
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
Provides some synthesis utilities to produce sequences of values.
This file implements a set that has insertion order iteration characteristics.
static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH)
Rewrite the PHI nodes in an unswitched loop exit basic block.
static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, bool Trivial, bool NonTrivial, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, LPMUpdater &LoopUpdater)
Unswitch control flow predicated on loop invariant conditions.
static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
This routine scans the loop to find a branch or switch which occurs before any side effects occur.
static cl::opt< bool > EnableNonTrivialUnswitch("enable-nontrivial-unswitch", cl::init(false), cl::Hidden, cl::desc("Forcibly enables non-trivial loop unswitching rather than " "following the configuration passed into the pass."))
static cl::opt< bool > UnswitchGuards("simple-loop-unswitch-guards", cl::init(true), cl::Hidden, cl::desc("If enabled, simple loop unswitching will also consider " "llvm.experimental.guard intrinsics as unswitch candidates."))
static SmallPtrSet< const BasicBlock *, 16 > recomputeLoopBlockSet(Loop &L, LoopInfo &LI)
Recompute the set of blocks in a loop after unswitching.
static int CalculateUnswitchCostMultiplier(const Instruction &TI, const Loop &L, const LoopInfo &LI, const DominatorTree &DT, ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates)
Cost multiplier is a way to limit potentially exponential behavior of loop-unswitch.
static void buildPartialInvariantUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > ToDuplicate, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L, MemorySSAUpdater *MSSAU)
Copy a set of loop invariant values, and conditionally branch on them.
static TinyPtrVector< Value * > collectHomogenousInstGraphLoopInvariants(const Loop &L, Instruction &Root, const LoopInfo &LI)
Collect all of the loop invariant input values transitively used by the homogeneous instruction graph...
static void deleteDeadClonedBlocks(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, ArrayRef< std::unique_ptr< ValueToValueMapTy > > VMaps, DominatorTree &DT, MemorySSAUpdater *MSSAU)
void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable)
Helper to visit a dominator subtree, invoking a callable on each node.
static BranchInst * turnSelectIntoBranch(SelectInst *SI, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, AssumptionCache *AC)
Turns a select instruction into implicit control flow branch, making the following replacement:
static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI)
void postUnswitch(Loop &L, LPMUpdater &U, StringRef LoopName, bool CurrentLoopValid, bool PartiallyInvariant, bool InjectedCondition, ArrayRef< Loop * > NewLoops)
static void buildPartialUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > Invariants, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze, const Instruction *I, AssumptionCache *AC, const DominatorTree &DT)
Copy a set of loop invariant values ToDuplicate and insert them at the end of BB and conditionally br...
static cl::opt< int > UnswitchNumInitialUnscaledCandidates("unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden, cl::desc("Number of unswitch candidates that are ignored when calculating " "cost multiplier."))
static bool shouldTryInjectInvariantCondition(const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS, const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L)
Returns true, if predicate described by ( Pred, LHS, RHS ) succeeding into blocks ( IfTrue,...
static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates, const Loop &L, const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC, const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo)
static cl::opt< bool > EnableUnswitchCostMultiplier("enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden, cl::desc("Enable unswitch cost multiplier that prohibits exponential " "explosion in nontrivial unswitch."))
static Value * skipTrivialSelect(Value *Cond)
static Loop * getTopMostExitingLoop(const BasicBlock *ExitBB, const LoopInfo &LI)
static bool collectUnswitchCandidatesWithInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L, const DominatorTree &DT, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
Collect unswitch candidates by invariant conditions that are not immediately present in the loop.
static cl::opt< int > UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden, cl::desc("The cost threshold for unswitching a loop."))
static void replaceLoopInvariantUses(const Loop &L, Value *Invariant, Constant &Replacement)
static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial branch if the condition is loop invariant.
static bool collectUnswitchCandidates(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, const Loop &L, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
static cl::opt< bool > InjectInvariantConditions("simple-loop-unswitch-inject-invariant-conditions", cl::Hidden, cl::desc("Whether we should inject new invariants and unswitch them to " "eliminate some existing (non-invariant) conditions."), cl::init(true))
static cl::opt< bool > FreezeLoopUnswitchCond("freeze-loop-unswitch-cond", cl::init(true), cl::Hidden, cl::desc("If enabled, the freeze instruction will be added to condition " "of loop unswitch to prevent miscompilation."))
static InstructionCost computeDomSubtreeCost(DomTreeNode &N, const SmallDenseMap< BasicBlock *, InstructionCost, 4 > &BBCostMap, SmallDenseMap< DomTreeNode *, InstructionCost, 4 > &DTCostMap)
Recursively compute the cost of a dominator subtree based on the per-block cost map provided.
static bool shouldInsertFreeze(Loop &L, Instruction &TI, DominatorTree &DT, AssumptionCache &AC)
static cl::opt< int > UnswitchSiblingsToplevelDiv("unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden, cl::desc("Toplevel siblings divisor for cost multiplier."))
static cl::opt< unsigned > MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", cl::desc("Max number of memory uses to explore during " "partial unswitching analysis"), cl::init(100), cl::Hidden)
static void canonicalizeForInvariantConditionInjection(CmpPredicate &Pred, Value *&LHS, Value *&RHS, BasicBlock *&IfTrue, BasicBlock *&IfFalse, const Loop &L)
Tries to canonicalize condition described by:
static bool areLoopExitPHIsLoopInvariant(const Loop &L, const BasicBlock &ExitingBB, const BasicBlock &ExitBB)
Check that all the LCSSA PHI nodes in the loop exit block have trivial incoming values along this edg...
static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH, bool FullUnswitch)
Rewrite the PHI nodes in the loop exit basic block and the split off unswitched block.
static bool insertCandidatesWithPendingInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, Loop &L, ICmpInst::Predicate Pred, ArrayRef< CompareDesc > Compares, const DominatorTree &DT)
Given chain of loop branch conditions looking like: br (Variant < Invariant1) br (Variant < Invariant...
static NonTrivialUnswitchCandidate injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, MemorySSAUpdater *MSSAU)
Materialize pending invariant condition of the given candidate into IR.
static cl::opt< bool > DropNonTrivialImplicitNullChecks("simple-loop-unswitch-drop-non-trivial-implicit-null-checks", cl::init(false), cl::Hidden, cl::desc("If enabled, drop make.implicit metadata in unswitched implicit " "null checks to save time analyzing if we can keep it."))
static cl::opt< unsigned > InjectInvariantConditionHotnesThreshold("simple-loop-unswitch-inject-invariant-condition-hotness-threshold", cl::Hidden, cl::desc("Only try to inject loop invariant conditions and " "unswitch on them to eliminate branches that are " "not-taken 1/<this option> times or less."), cl::init(16))
static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial switch if the condition is loop invariant.
static void unswitchNontrivialInvariants(Loop &L, Instruction &TI, ArrayRef< Value * > Invariants, IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater, bool InsertFreeze, bool InjectedCondition)
static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, LoopInfo &LI, SmallVectorImpl< Loop * > &HoistedLoops, ScalarEvolution *SE)
Rebuild a loop after unswitching removes some subset of blocks and edges.
static bool unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater)
static BasicBlock * buildClonedLoopBlocks(Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB, ArrayRef< BasicBlock * > ExitBlocks, BasicBlock *ParentBB, BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB, const SmallDenseMap< BasicBlock *, BasicBlock *, 16 > &DominatingSucc, ValueToValueMapTy &VMap, SmallVectorImpl< DominatorTree::UpdateType > &DTUpdates, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Build the cloned blocks for an unswitched copy of the given loop.
static void deleteDeadBlocksFromLoop(Loop &L, SmallVectorImpl< BasicBlock * > &ExitBlocks, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE, LPMUpdater &LoopUpdater)
bool shouldTryInjectBasingOnMetadata(const BranchInst *BI, const BasicBlock *TakenSucc)
Returns true, if metadata on BI allows us to optimize branching into TakenSucc via injection of invar...
static BranchInst * turnGuardIntoBranch(IntrinsicInst *GI, Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU)
Turns a llvm.experimental.guard intrinsic into implicit control flow branch, making the following rep...
static Loop * cloneLoopNest(Loop &OrigRootL, Loop *RootParentL, const ValueToValueMapTy &VMap, LoopInfo &LI)
Recursively clone the specified loop and all of its children.
static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Hoist the current loop up to the innermost loop containing a remaining exit.
static void buildClonedLoops(Loop &OrigL, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, LoopInfo &LI, SmallVectorImpl< Loop * > &NonChildClonedLoops)
Build the cloned loops of an original loop from unswitching.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
#define LLVM_DEBUG(...)
Definition: Debug.h:119
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
A private abstract base class describing the concept of an individual alias analysis implementation.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:219
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:255
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:412
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:136
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
iterator begin() const
Definition: ArrayRef.h:135
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:142
A cache of @llvm.assume calls within a function.
LLVM_ABI void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
iterator end()
Definition: BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:459
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:528
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:206
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:170
size_t size() const
Definition: BasicBlock.h:480
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition: BasicBlock.h:386
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:233
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:494
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
LLVM_ABI void swapSuccessors()
Swap the successors of this branch instruction.
bool isConditional() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1292
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1297
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:678
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Definition: CmpPredicate.h:23
This is the shared class of boolean and integer constants.
Definition: Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:868
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:875
This is an important base class in LLVM.
Definition: Constant.h:43
LLVM_ABI bool isOneValue() const
Returns true if the value is one.
Definition: Constants.cpp:124
A debug info location.
Definition: DebugLoc.h:124
static DebugLoc getCompilerGenerated()
Definition: DebugLoc.h:163
static DebugLoc getDropped()
Definition: DebugLoc.h:164
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:203
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:177
iterator begin()
Definition: DenseMap.h:78
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:173
iterator end()
Definition: DenseMap.h:87
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:230
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
void insertEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge insertion and update the tree.
static constexpr UpdateKind Delete
static constexpr UpdateKind Insert
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:165
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:334
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:135
This class represents a freeze function that returns random concrete value if an operand is either a ...
This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to give precise answers on "may...
Definition: MustExecute.h:133
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
Definition: MustExecute.cpp:77
bool isRelational() const
Return true if the predicate is relational (not EQ or NE).
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2637
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:247
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1197
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2082
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1551
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:207
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition: IRBuilder.h:1573
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2780
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void dropLocation()
Drop the instruction's debug location.
Definition: DebugInfo.cpp:932
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:513
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:428
bool isTerminator() const
Definition: Instruction.h:315
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1718
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:510
LLVM_ABI InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:49
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
BlockT * getHeader() const
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
void reserveBlocks(unsigned size)
interface to do reserve() for Blocks
iterator_range< block_iterator > blocks() const
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
Definition: LoopIterator.h:172
void perform(const LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopIterator.h:180
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
iterator end() const
LoopT * AllocateLoop(ArgsTy &&...Args)
LoopT * removeLoop(iterator I)
This removes the specified top-level loop from this loop info object.
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
unsigned getLoopDepth(const BlockT *BB) const
Return the loop nesting level of the specified block.
iterator begin() const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
void destroy(LoopT *L)
Destroy a loop that has been removed from the LoopInfo nest.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:40
StringRef getName() const
Definition: LoopInfo.h:390
Metadata node.
Definition: Metadata.h:1077
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1565
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:607
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:371
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:936
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
LLVM_ABI void removeEdge(BasicBlock *From, BasicBlock *To)
Update the MemoryPhi in To following an edge deletion between From and To.
LLVM_ABI void updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VM, bool IgnoreIncomingWithNoClones=false)
Update MemorySSA after a loop was cloned, given the blocks in RPO order, the exit blocks and a 1:1 ma...
LLVM_ABI void removeDuplicatePhiEdgesBetween(const BasicBlock *From, const BasicBlock *To)
Update the MemoryPhi in To to have a single incoming edge from From, following a CFG change that repl...
LLVM_ABI void removeBlocks(const SmallSetVector< BasicBlock *, 8 > &DeadBlocks)
Remove all MemoryAcceses in a set of BasicBlocks about to be deleted.
LLVM_ABI void moveAllAfterSpliceBlocks(BasicBlock *From, BasicBlock *To, Instruction *Start)
From block was spliced into From and To.
LLVM_ABI MemoryAccess * createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point, bool CreationMustSucceed=true)
Create a MemoryAccess in MemorySSA at a specified point in a block.
LLVM_ABI void applyInsertUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT)
Apply CFG insert updates, analogous with the DT edge updates.
LLVM_ABI void applyUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT, bool UpdateDTFirst=false)
Apply CFG updates, analogous with the DT edge updates.
LLVM_ABI void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)
LLVM_ABI void updateExitBlocksForClonedLoop(ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, DominatorTree &DT)
Update phi nodes in exit block successors following cloning.
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:702
LLVM_ABI void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1905
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:720
const DefsList * getBlockDefs(const BasicBlock *BB) const
Return the list of MemoryDef's and MemoryPhi's for a given basic block.
Definition: MemorySSA.h:768
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:716
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1885
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:118
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
bool isColdBlock(const BBType *BB, BFIT *BFI) const
Returns true if BasicBlock BB is considered cold.
The main scalar evolution driver.
LLVM_ABI void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
LLVM_ABI void forgetTopmostLoop(const Loop *L)
LLVM_ABI void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
LLVM_ABI void forgetLcssaPhiWithNewPredecessor(Loop *L, PHINode *V)
Forget LCSSA phi node V of loop L to which a new predecessor was added, such that it may no longer be...
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:104
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:279
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:109
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:168
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
size_type size() const
Definition: SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:380
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:418
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:470
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:356
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:938
void reserve(size_type N)
Definition: SmallVector.h:664
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:684
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
LLVM_ABI void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
LLVM_ABI Instruction::InstListType::iterator eraseFromParent()
Delegate the call to the underlying SwitchInst::eraseFromParent() and mark this object to not touch t...
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
LLVM_ABI CaseWeightOpt getSuccessorWeight(unsigned idx)
std::optional< uint32_t > CaseWeightOpt
LLVM_ABI SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
unsigned getSuccessorIndex() const
Returns successor index for current case successor.
BasicBlockT * getCaseSuccessor() const
Resolves successor for current case.
ConstantIntT * getCaseValue() const
Resolves case value for current case.
Multiway switch.
BasicBlock * getDefaultDest() const
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, InsertPosition InsertBefore=nullptr)
void setDefaultDest(BasicBlock *DefaultCase)
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI bool hasBranchDivergence(const Function *F=nullptr) const
Return true if branch divergence exists.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
LLVM_ABI InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:29
void push_back(EltTy NewVal)
bool empty() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:82
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:240
LLVM_ABI unsigned getIntegerBitWidth() const
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: ValueMap.h:169
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: ValueMap.h:156
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:390
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1098
iterator_range< use_iterator > uses()
Definition: Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:322
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition: ilist_node.h:34
self_iterator getIterator()
Definition: ilist_node.h:134
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI Function * getDeclarationIfExists(const Module *M, ID id)
Look up the Function declaration of the intrinsic id in the Module M and return it if it exists.
Definition: Intrinsics.cpp:762
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:592
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
Definition: PatternMatch.h:189
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:612
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:338
void stable_sort(R &&Range)
Definition: STLExtras.h:2077
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1770
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:533
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
auto successors(const MachineBasicBlock *BB)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2155
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:663
LLVM_ABI MDNode * findOptionMDForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for a loop.
Definition: LoopInfo.cpp:1079
Op::Description Desc
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:336
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
Definition: GuardUtils.cpp:18
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
Definition: ValueMapper.h:317
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
detail::zippy< detail::zip_first, T, U, Args... > zip_first(T &&t, U &&u, Args &&...args)
zip iterator that, for the sake of efficiency, assumes the first iteratee to be the shortest.
Definition: STLExtras.h:883
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1669
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition: ValueMapper.h:98
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:80
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
LLVM_ABI bool VerifyLoopInfo
Enable verification of loop info.
Definition: LoopInfo.cpp:51
LLVM_ABI bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:84
LLVM_ABI bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition: LoopUtils.cpp:58
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
Definition: ValueMapper.h:289
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1980
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
LLVM_ABI llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
Definition: LoopInfo.cpp:1182
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2139
auto predecessors(const MachineBasicBlock *BB)
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:119
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
LLVM_ABI BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
LLVM_ABI std::optional< IVConditionInfo > hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold, const MemorySSA &MSSA, AAResults &AA)
Check if the loop header has a conditional branch that is not loop-invariant, because it involves loa...
Definition: LoopUtils.cpp:2084
LLVM_ABI bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put loop into LCSSA form.
Definition: LCSSA.cpp:427
LLVM_ABI void mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap)
Mark a cloned instruction as a new instance so that its source loc can be updated when remapped.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858
#define N
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: Analysis.h:29
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:71
Description of the encoding of one expression Op.
Struct to hold information about a partially invariant condition.
Definition: LoopUtils.h:580
SmallVector< Instruction * > InstToDuplicate
Instructions that need to be duplicated and checked for the unswitching condition.
Definition: LoopUtils.h:583
Constant * KnownValue
Constant to indicate for which value the condition is invariant.
Definition: LoopUtils.h:586
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Direction
An enum for the direction of the loop.
Definition: LoopInfo.h:217
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:70