69#define DEBUG_TYPE "simple-loop-unswitch"
74STATISTIC(NumBranches,
"Number of branches unswitched");
75STATISTIC(NumSwitches,
"Number of switches unswitched");
76STATISTIC(NumSelects,
"Number of selects turned into branches for unswitching");
77STATISTIC(NumGuards,
"Number of guards turned into branches for unswitching");
78STATISTIC(NumTrivial,
"Number of unswitches that are trivial");
80 NumCostMultiplierSkipped,
81 "Number of unswitch candidates that had their cost multiplier skipped");
83 "Number of invariant conditions injected and unswitched");
87 cl::desc(
"Forcibly enables non-trivial loop unswitching rather than "
88 "following the configuration passed into the pass."));
92 cl::desc(
"The cost threshold for unswitching a loop."));
96 cl::desc(
"Enable unswitch cost multiplier that prohibits exponential "
97 "explosion in nontrivial unswitch."));
100 cl::desc(
"Toplevel siblings divisor for cost multiplier."));
103 cl::desc(
"Number of unswitch candidates that are ignored when calculating "
104 "cost multiplier."));
107 cl::desc(
"If enabled, simple loop unswitching will also consider "
108 "llvm.experimental.guard intrinsics as unswitch candidates."));
110 "simple-loop-unswitch-drop-non-trivial-implicit-null-checks",
112 cl::desc(
"If enabled, drop make.implicit metadata in unswitched implicit "
113 "null checks to save time analyzing if we can keep it."));
116 cl::desc(
"Max number of memory uses to explore during "
117 "partial unswitching analysis"),
121 cl::desc(
"If enabled, the freeze instruction will be added to condition "
122 "of loop unswitch to prevent miscompilation."));
125 "simple-loop-unswitch-inject-invariant-conditions",
cl::Hidden,
126 cl::desc(
"Whether we should inject new invariants and unswitch them to "
127 "eliminate some existing (non-invariant) conditions."),
131 "simple-loop-unswitch-inject-invariant-condition-hotness-threshold",
133 "unswitch on them to eliminate branches that are "
134 "not-taken 1/<this option> times or less."),
145 : Term(Term), Invariant(Invariant), InLoopSucc(InLoopSucc) {}
148struct InjectedInvariant {
156 : Pred(Pred),
LHS(
LHS),
RHS(
RHS), InLoopSucc(InLoopSucc) {}
159struct NonTrivialUnswitchCandidate {
162 std::optional<InstructionCost>
Cost;
163 std::optional<InjectedInvariant> PendingInjection;
164 NonTrivialUnswitchCandidate(
166 std::optional<InstructionCost>
Cost = std::nullopt,
167 std::optional<InjectedInvariant> PendingInjection = std::nullopt)
168 : TI(TI), Invariants(Invariants),
Cost(
Cost),
169 PendingInjection(PendingInjection) {};
171 bool hasPendingInjection()
const {
return PendingInjection.has_value(); }
195 assert(!L.isLoopInvariant(&Root) &&
196 "Only need to walk the graph if root itself is not invariant.");
209 for (
Value *OpV :
I.operand_values()) {
211 if (isa<Constant>(OpV))
215 if (L.isLoopInvariant(OpV)) {
226 if (Visited.
insert(OpI).second)
230 }
while (!Worklist.
empty());
237 assert(!isa<Constant>(Invariant) &&
"Why are we unswitching on a constant?");
242 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
245 if (UserI && L.contains(UserI))
256 auto *PN = dyn_cast<PHINode>(&
I);
263 if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
280 for (
Value *Inv : Invariants) {
289 Direction ? &NormalSucc : &UnswitchedSucc);
298 for (
auto *Val :
reverse(ToDuplicate)) {
316 auto *DefiningAccess = MemUse->getDefiningAccess();
318 while (L.contains(DefiningAccess->getBlock())) {
321 if (
auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess))
323 MemPhi->getIncomingValueForBlock(L.getLoopPreheader());
325 DefiningAccess = cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
337 Direction ? &NormalSucc : &UnswitchedSucc);
354 for (
auto i : seq<int>(0, PN.getNumOperands())) {
355 assert(PN.getIncomingBlock(i) == &OldExitingBB &&
356 "Found incoming block different from unique predecessor!");
357 PN.setIncomingBlock(i, &OldPH);
374 assert(&ExitBB != &UnswitchedBB &&
375 "Must have different loop exit and unswitched blocks!");
379 PN.getName() +
".split");
380 NewPN->insertBefore(InsertPt);
391 for (
int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
392 if (PN.getIncomingBlock(i) != &OldExitingBB)
398 PN.removeIncomingValue(i);
400 NewPN->addIncoming(
Incoming, &OldPH);
405 PN.replaceAllUsesWith(NewPN);
406 NewPN->addIncoming(&PN, &ExitBB);
419 Loop *OldParentL = L.getParentLoop();
424 L.getExitBlocks(Exits);
425 Loop *NewParentL =
nullptr;
426 for (
auto *ExitBB : Exits)
428 if (!NewParentL || NewParentL->
contains(ExitL))
431 if (NewParentL == OldParentL)
437 "Can only hoist this loop up the nest!");
442 "Parent loop of this loop should contain this loop's preheader!");
457 for (
Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;
461 return BB == &Preheader || L.contains(BB);
464 OldContainingL->getBlocksSet().erase(&Preheader);
466 OldContainingL->getBlocksSet().erase(BB);
489 Loop *Current = TopMost;
519 LLVM_DEBUG(
dbgs() <<
" Trying to unswitch branch: " << BI <<
"\n");
526 bool FullUnswitch =
false;
529 if (L.isLoopInvariant(
Cond)) {
533 if (
auto *CondInst = dyn_cast<Instruction>(
Cond))
535 if (Invariants.
empty()) {
542 bool ExitDirection =
true;
543 int LoopExitSuccIdx = 0;
545 if (L.contains(LoopExitBB)) {
546 ExitDirection =
false;
549 if (L.contains(LoopExitBB)) {
554 auto *ContinueBB = BI.
getSuccessor(1 - LoopExitSuccIdx);
557 LLVM_DEBUG(
dbgs() <<
" Loop exit PHI's aren't loop-invariant!\n");
570 "non-full unswitch!\n");
576 dbgs() <<
" unswitching trivial invariant conditions for: " << BI
578 for (
Value *Invariant : Invariants) {
579 dbgs() <<
" " << *Invariant <<
" == true";
580 if (Invariant != Invariants.back())
612 if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {
614 "A branch's parent isn't a predecessor!");
615 UnswitchedBB = LoopExitBB;
618 SplitBlock(LoopExitBB, LoopExitBB->begin(), &DT, &LI, MSSAU,
"",
false);
651 "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
655 "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
658 *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH,
669 Updates.
push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});
681 Term->eraseFromParent();
691 if (UnswitchedBB == LoopExitBB)
695 *ParentBB, *OldPH, FullUnswitch);
706 for (
Value *Invariant : Invariants)
752 LLVM_DEBUG(
dbgs() <<
" Trying to unswitch switch: " << SI <<
"\n");
753 Value *LoopCond = SI.getCondition();
756 if (!L.isLoopInvariant(LoopCond))
759 auto *ParentBB = SI.getParent();
766 auto IsTriviallyUnswitchableExitBlock = [&](
BasicBlock &BBToCheck) {
768 if (L.contains(&BBToCheck))
777 auto *TI = BBToCheck.getTerminator();
778 bool isUnreachable = isa<UnreachableInst>(TI);
779 return !isUnreachable || &*BBToCheck.getFirstNonPHIOrDbg() != TI;
783 for (
auto Case : SI.cases())
784 if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor()))
785 ExitCaseIndices.
push_back(Case.getCaseIndex());
789 if (IsTriviallyUnswitchableExitBlock(*SI.getDefaultDest())) {
790 DefaultExitBB = SI.getDefaultDest();
791 }
else if (ExitCaseIndices.
empty())
806 if (!ExitL || ExitL->
contains(OuterL))
809 for (
unsigned Index : ExitCaseIndices) {
810 auto CaseI = SI.case_begin() + Index;
813 if (!ExitL || ExitL->
contains(OuterL))
827 SI.setDefaultDest(
nullptr);
835 ExitCases.reserve(ExitCaseIndices.
size());
839 for (
unsigned Index :
reverse(ExitCaseIndices)) {
840 auto CaseI = SI.case_begin() + Index;
843 ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
851 if (SI.getNumCases() > 0 &&
853 return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();
855 CommonSuccBB = SI.case_begin()->getCaseSuccessor();
856 if (!DefaultExitBB) {
860 if (SI.getNumCases() == 0)
861 CommonSuccBB = SI.getDefaultDest();
862 else if (SI.getDefaultDest() != CommonSuccBB)
863 CommonSuccBB =
nullptr;
892 UnswitchedExitBBs.
insert(DefaultExitBB);
900 DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
905 for (
auto &ExitCase :
reverse(ExitCases)) {
913 if (UnswitchedExitBBs.
insert(ExitBB).second)
920 BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
929 std::get<1>(ExitCase) = SplitExitBB;
934 for (
auto &ExitCase :
reverse(ExitCases)) {
936 BasicBlock *UnswitchedBB = std::get<1>(ExitCase);
938 NewSIW.
addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
949 for (
const auto &Case : SI.cases())
952 }
else if (DefaultCaseWeight) {
955 for (
const auto &Case : SI.cases()) {
958 "case weight must be defined as default case weight is defined");
973 bool SkippedFirst = DefaultExitBB ==
nullptr;
974 for (
auto Case : SI.cases()) {
976 "Non-common successor!");
989 }
else if (DefaultExitBB) {
990 assert(SI.getNumCases() > 0 &&
991 "If we had no cases we'd have a common successor!");
996 auto LastCaseI = std::prev(SI.case_end());
998 SI.setDefaultDest(LastCaseI->getCaseSuccessor());
1009 for (
auto *UnswitchedExitBB : UnswitchedExitBBs) {
1013 for (
auto SplitUnswitchedPair : SplitExitBBMap) {
1014 DTUpdates.
push_back({DT.
Delete, ParentBB, SplitUnswitchedPair.first});
1026 assert(DT.
verify(DominatorTree::VerificationLevel::Fast));
1056 bool Changed =
false;
1071 Visited.
insert(CurrentBB);
1078 if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))
1086 if (
auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
1090 if (isa<Constant>(SI->getCondition()))
1104 auto *BI = dyn_cast<BranchInst>(CurrentBB->
getTerminator());
1105 if (!BI || BI->isConditional())
1108 CurrentBB = BI->getSuccessor(0);
1112 auto *BI = dyn_cast<BranchInst>(CurrentTerm);
1120 if (!BI->isConditional() ||
1135 if (BI->isConditional())
1139 CurrentBB = BI->getSuccessor(0);
1144 }
while (L.contains(CurrentBB) && Visited.
insert(CurrentBB).second);
1182 NewBlocks.
reserve(L.getNumBlocks() + ExitBlocks.
size());
1193 VMap[OldBB] = NewBB;
1201 auto It = DominatingSucc.
find(BB);
1202 return It != DominatingSucc.
end() && It->second != UnswitchedSuccBB;
1206 auto *ClonedPH = CloneBlock(LoopPH);
1209 for (
auto *LoopBB : L.blocks())
1210 if (!SkipBlock(LoopBB))
1216 for (
auto *ExitBB : ExitBlocks) {
1217 if (SkipBlock(ExitBB))
1225 auto *MergeBB =
SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU);
1230 MergeBB->takeName(ExitBB);
1231 ExitBB->setName(
Twine(MergeBB->getName()) +
".split");
1234 auto *ClonedExitBB = CloneBlock(ExitBB);
1235 assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&
1236 "Exit block should have been split to have one successor!");
1237 assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&
1238 "Cloned exit block has the wrong successor!");
1244 std::prev(ClonedExitBB->end())))) {
1251 (isa<PHINode>(
I) || isa<LandingPadInst>(
I) || isa<CatchPadInst>(
I)) &&
1252 "Bad instruction in exit block!");
1254 assert(VMap.
lookup(&
I) == &ClonedI &&
"Mismatch in the value map!");
1258 if (
auto *PN = dyn_cast<PHINode>(&
I))
1265 MergePN->insertBefore(InsertPt);
1266 MergePN->setDebugLoc(InsertPt->getDebugLoc());
1267 I.replaceAllUsesWith(MergePN);
1268 MergePN->addIncoming(&
I, ExitBB);
1269 MergePN->addIncoming(&ClonedI, ClonedExitBB);
1278 Module *M = ClonedPH->getParent()->getParent();
1279 for (
auto *ClonedBB : NewBlocks)
1285 if (
auto *
II = dyn_cast<AssumeInst>(&
I))
1291 for (
auto *LoopBB : L.blocks())
1292 if (SkipBlock(LoopBB))
1294 if (
auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.
lookup(SuccBB)))
1295 for (
PHINode &PN : ClonedSuccBB->phis())
1296 PN.removeIncomingValue(LoopBB,
false);
1300 auto *ClonedParentBB = cast<BasicBlock>(VMap.
lookup(ParentBB));
1302 if (SuccBB == UnswitchedSuccBB)
1305 auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.
lookup(SuccBB));
1309 ClonedSuccBB->removePredecessor(ClonedParentBB,
1315 auto *ClonedSuccBB = cast<BasicBlock>(VMap.
lookup(UnswitchedSuccBB));
1316 Instruction *ClonedTerminator = ClonedParentBB->getTerminator();
1319 Value *ClonedConditionToErase =
nullptr;
1320 if (
auto *BI = dyn_cast<BranchInst>(ClonedTerminator))
1321 ClonedConditionToErase = BI->getCondition();
1322 else if (
auto *SI = dyn_cast<SwitchInst>(ClonedTerminator))
1323 ClonedConditionToErase = SI->getCondition();
1329 if (ClonedConditionToErase)
1336 for (
PHINode &PN : ClonedSuccBB->phis()) {
1340 for (
int i = PN.getNumOperands() - 1; i >= 0; --i) {
1341 if (PN.getIncomingBlock(i) != ClonedParentBB)
1347 PN.removeIncomingValue(i,
false);
1353 for (
auto *ClonedBB : NewBlocks) {
1355 if (SuccSet.
insert(SuccBB).second)
1356 DTUpdates.
push_back({DominatorTree::Insert, ClonedBB, SuccBB});
1371 auto AddClonedBlocksToLoop = [&](
Loop &OrigL,
Loop &ClonedL) {
1372 assert(ClonedL.getBlocks().empty() &&
"Must start with an empty loop!");
1374 for (
auto *BB : OrigL.
blocks()) {
1375 auto *ClonedBB = cast<BasicBlock>(VMap.
lookup(BB));
1376 ClonedL.addBlockEntry(ClonedBB);
1389 AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
1401 LoopsToClone.
push_back({ClonedRootL, ChildL});
1403 Loop *ClonedParentL, *L;
1404 std::tie(ClonedParentL, L) = LoopsToClone.
pop_back_val();
1407 AddClonedBlocksToLoop(*L, *ClonedL);
1409 LoopsToClone.
push_back({ClonedL, ChildL});
1410 }
while (!LoopsToClone.
empty());
1431 Loop *ClonedL =
nullptr;
1436 auto *ClonedPH = cast<BasicBlock>(VMap.
lookup(OrigPH));
1437 auto *ClonedHeader = cast<BasicBlock>(VMap.
lookup(OrigHeader));
1443 Loop *ParentL =
nullptr;
1447 for (
auto *ExitBB : ExitBlocks)
1448 if (
auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.
lookup(ExitBB)))
1450 ExitLoopMap[ClonedExitBB] = ExitL;
1451 ClonedExitsInLoops.
push_back(ClonedExitBB);
1452 if (!ParentL || (ParentL != ExitL && ParentL->
contains(ExitL)))
1457 "The computed parent loop should always contain (or be) the parent of "
1458 "the original loop.");
1465 for (
auto *BB : OrigL.
blocks())
1466 if (
auto *ClonedBB = cast_or_null<BasicBlock>(VMap.
lookup(BB)))
1467 ClonedLoopBlocks.
insert(ClonedBB);
1478 if (Pred == ClonedPH)
1483 assert(ClonedLoopBlocks.
count(Pred) &&
"Found a predecessor of the loop "
1484 "header other than the preheader "
1485 "that is not part of the loop!");
1490 if (BlocksInClonedLoop.
insert(Pred).second && Pred != ClonedHeader)
1497 if (!BlocksInClonedLoop.
empty()) {
1498 BlocksInClonedLoop.
insert(ClonedHeader);
1500 while (!Worklist.
empty()) {
1503 "Didn't put block into the loop set!");
1511 if (ClonedLoopBlocks.
count(Pred) &&
1512 BlocksInClonedLoop.
insert(Pred).second)
1531 for (
auto *BB : OrigL.
blocks()) {
1532 auto *ClonedBB = cast_or_null<BasicBlock>(VMap.
lookup(BB));
1533 if (!ClonedBB || !BlocksInClonedLoop.
count(ClonedBB))
1545 for (
Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
1546 PL->addBlockEntry(ClonedBB);
1553 for (
Loop *ChildL : OrigL) {
1554 auto *ClonedChildHeader =
1555 cast_or_null<BasicBlock>(VMap.
lookup(ChildL->getHeader()));
1556 if (!ClonedChildHeader || !BlocksInClonedLoop.
count(ClonedChildHeader))
1562 for (
auto *ChildLoopBB : ChildL->blocks())
1564 cast<BasicBlock>(VMap.
lookup(ChildLoopBB))) &&
1565 "Child cloned loop has a header within the cloned outer "
1566 "loop but not all of its blocks!");
1581 if (BlocksInClonedLoop.
empty())
1582 UnloopedBlockSet.
insert(ClonedPH);
1583 for (
auto *ClonedBB : ClonedLoopBlocks)
1584 if (!BlocksInClonedLoop.
count(ClonedBB))
1585 UnloopedBlockSet.
insert(ClonedBB);
1591 auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
1593 return ExitLoopMap.
lookup(
LHS)->getLoopDepth() <
1594 ExitLoopMap.
lookup(
RHS)->getLoopDepth();
1599 while (!UnloopedBlockSet.
empty() && !OrderedClonedExitsInLoops.empty()) {
1600 assert(Worklist.
empty() &&
"Didn't clear worklist!");
1602 BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
1617 if (!UnloopedBlockSet.
erase(PredBB)) {
1619 (BlocksInClonedLoop.
count(PredBB) || ExitLoopMap.
count(PredBB)) &&
1620 "Predecessor not mapped to a loop!");
1627 bool Inserted = ExitLoopMap.
insert({PredBB, ExitL}).second;
1629 assert(Inserted &&
"Should only visit an unlooped block once!");
1634 }
while (!Worklist.
empty());
1643 for (
auto *BB : llvm::concat<BasicBlock *const>(
1644 ArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
1646 OuterL->addBasicBlockToLoop(BB, LI);
1649 for (
auto &BBAndL : ExitLoopMap) {
1650 auto *BB = BBAndL.first;
1651 auto *OuterL = BBAndL.second;
1653 "Failed to put all blocks into outer loops!");
1660 for (
Loop *ChildL : OrigL) {
1661 auto *ClonedChildHeader =
1662 cast_or_null<BasicBlock>(VMap.
lookup(ChildL->getHeader()));
1663 if (!ClonedChildHeader || BlocksInClonedLoop.
count(ClonedChildHeader))
1667 for (
auto *ChildLoopBB : ChildL->blocks())
1669 "Cloned a child loop header but not all of that loops blocks!");
1673 *ChildL, ExitLoopMap.
lookup(ClonedChildHeader), VMap, LI));
1679 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
1683 for (
BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
1684 for (
const auto &VMap : VMaps)
1685 if (
BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))
1688 SuccBB->removePredecessor(ClonedBB);
1701 BB->dropAllReferences();
1704 BB->eraseFromParent();
1721 DeathCandidates.
append(L.blocks().begin(), L.blocks().end());
1722 while (!DeathCandidates.
empty()) {
1726 SuccBB->removePredecessor(BB);
1743 for (
Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
1744 for (
auto *BB : DeadBlockSet)
1745 ParentL->getBlocksSet().erase(BB);
1747 [&](
BasicBlock *BB) { return DeadBlockSet.count(BB); });
1753 if (!DeadBlockSet.count(ChildL->getHeader()))
1756 assert(llvm::all_of(ChildL->blocks(),
1757 [&](BasicBlock *ChildBB) {
1758 return DeadBlockSet.count(ChildBB);
1760 "If the child loop header is dead all blocks in the child loop must "
1761 "be dead as well!");
1772 for (
auto *BB : DeadBlockSet) {
1774 assert(!DT.
getNode(BB) &&
"Should already have cleared domtree!");
1775 LI.changeLoopFor(BB,
nullptr);
1781 BB->dropAllReferences();
1786 for (
auto *BB : DeadBlockSet)
1787 BB->eraseFromParent();
1805 auto *PH = L.getLoopPreheader();
1806 auto *Header = L.getHeader();
1820 assert(L.contains(Pred) &&
"Found a predecessor of the loop header other "
1821 "than the preheader that is not part of the "
1827 if (LoopBlockSet.
insert(Pred).second && Pred != Header)
1832 if (LoopBlockSet.
empty())
1833 return LoopBlockSet;
1836 while (!Worklist.
empty()) {
1838 assert(LoopBlockSet.
count(BB) &&
"Didn't put block into the loop set!");
1850 assert(L.contains(InnerL) &&
1851 "Should not reach a loop *outside* this loop!");
1854 auto *InnerPH = InnerL->getLoopPreheader();
1855 assert(L.contains(InnerPH) &&
"Cannot contain an inner loop block "
1856 "but not contain the inner loop "
1858 if (!LoopBlockSet.
insert(InnerPH).second)
1868 for (
auto *InnerBB : InnerL->blocks()) {
1869 if (InnerBB == BB) {
1871 "Block should already be in the set!");
1875 LoopBlockSet.
insert(InnerBB);
1887 if (L.contains(Pred) && LoopBlockSet.
insert(Pred).second)
1891 assert(LoopBlockSet.
count(Header) &&
"Cannot fail to add the header!");
1895 return LoopBlockSet;
1916 auto *PH = L.getLoopPreheader();
1920 Loop *ParentL =
nullptr;
1924 for (
auto *ExitBB : ExitBlocks)
1928 if (!ParentL || (ParentL != ExitL && ParentL->
contains(ExitL)))
1940 if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
1942 for (
Loop *IL = L.getParentLoop(); IL != ParentL;
1944 IL->getBlocksSet().erase(PH);
1945 for (
auto *BB : L.blocks())
1946 IL->getBlocksSet().erase(BB);
1948 return BB == PH || L.contains(BB);
1953 L.getParentLoop()->removeChildLoop(&L);
1961 auto &
Blocks = L.getBlocksVector();
1963 LoopBlockSet.empty()
1965 : std::stable_partition(
1967 [&](
BasicBlock *BB) { return LoopBlockSet.count(BB); });
1971 if (LoopBlockSet.empty())
1972 UnloopedBlocks.
insert(PH);
1976 L.getBlocksSet().erase(BB);
1987 Loop *PrevExitL = L.getParentLoop();
1989 auto RemoveUnloopedBlocksFromLoop =
1991 for (
auto *BB : UnloopedBlocks)
1992 L.getBlocksSet().erase(BB);
1994 return UnloopedBlocks.count(BB);
1999 while (!UnloopedBlocks.
empty() && !ExitsInLoops.
empty()) {
2000 assert(Worklist.
empty() &&
"Didn't clear worklist!");
2001 assert(NewExitLoopBlocks.empty() &&
"Didn't clear loop set!");
2006 assert(ExitL.
contains(&L) &&
"Exit loop must contain the inner loop!");
2012 for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->
getParentLoop())
2013 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
2027 if (!UnloopedBlocks.
erase(PredBB)) {
2028 assert((NewExitLoopBlocks.count(PredBB) ||
2030 "Predecessor not in a nested loop (or already visited)!");
2037 bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
2039 assert(Inserted &&
"Should only visit an unlooped block once!");
2044 }
while (!Worklist.
empty());
2049 for (
auto *BB : NewExitLoopBlocks)
2051 if (BBL == &L || !L.contains(BBL))
2056 NewExitLoopBlocks.clear();
2062 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
2063 for (
auto *BB : UnloopedBlocks)
2065 if (BBL == &L || !L.contains(BBL))
2071 auto &SubLoops = L.getSubLoopsVector();
2072 auto SubLoopsSplitI =
2073 LoopBlockSet.empty()
2075 : std::stable_partition(
2076 SubLoops.begin(), SubLoops.end(), [&](
Loop *SubL) {
2077 return LoopBlockSet.count(SubL->getHeader());
2079 for (
auto *HoistedL :
make_range(SubLoopsSplitI, SubLoops.end())) {
2081 HoistedL->setParentLoop(
nullptr);
2091 if (
auto *NewParentL = LI.
getLoopFor(HoistedL->getLoopPreheader()))
2092 NewParentL->addChildLoop(HoistedL);
2096 SubLoops.erase(SubLoopsSplitI, SubLoops.end());
2100 assert(SubLoops.empty() &&
2101 "Failed to remove all subloops from the original loop!");
2102 if (
Loop *ParentL = L.getParentLoop())
2120template <
typename CallableT>
2132 if (!Callable(
N->getBlock()))
2138 "Cannot visit a node twice when walking a tree!");
2141 }
while (!DomWorklist.
empty());
2145 bool CurrentLoopValid,
bool PartiallyInvariant,
2148 if (!NewLoops.
empty())
2149 U.addSiblingLoops(NewLoops);
2153 if (CurrentLoopValid) {
2154 if (PartiallyInvariant) {
2157 auto &
Context = L.getHeader()->getContext();
2162 Context, L.getLoopID(), {
"llvm.loop.unswitch.partial"},
2163 {DisableUnswitchMD});
2164 L.setLoopID(NewLoopID);
2165 }
else if (InjectedCondition) {
2167 auto &
Context = L.getHeader()->getContext();
2172 Context, L.getLoopID(), {
"llvm.loop.unswitch.injection"},
2173 {DisableUnswitchMD});
2174 L.setLoopID(NewLoopID);
2176 U.revisitCurrentLoop();
2178 U.markLoopAsDeleted(L, LoopName);
2185 LPMUpdater &LoopUpdater,
bool InsertFreeze,
bool InjectedCondition) {
2188 SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
2192 std::string LoopName(L.getName());
2198 "Can only unswitch switches and conditional branch!");
2202 !PartiallyInvariant);
2205 "Cannot have other invariants with full unswitching!");
2208 "Partial unswitching requires an instruction as the condition!");
2221 if (!FullUnswitch) {
2225 PartiallyInvariant) &&
2226 "Only `or`, `and`, an `select`, partially invariant instructions "
2227 "can combine invariants being unswitched.");
2238 BI ? BI->
getSuccessor(1 - ClonedSucc) : SI->getDefaultDest();
2243 for (
auto Case : SI->cases())
2244 if (Case.getCaseSuccessor() != RetainedSuccBB)
2245 UnswitchedSuccBBs.
insert(Case.getCaseSuccessor());
2247 assert(!UnswitchedSuccBBs.
count(RetainedSuccBB) &&
2248 "Should not unswitch the same successor we are retaining!");
2257 Loop *ParentL = L.getParentLoop();
2266 Loop *OuterExitL = &L;
2268 L.getUniqueExitBlocks(ExitBlocks);
2269 for (
auto *ExitBB : ExitBlocks) {
2273 if (!NewOuterExitL) {
2275 OuterExitL =
nullptr;
2278 if (NewOuterExitL != OuterExitL && NewOuterExitL->
contains(OuterExitL))
2279 OuterExitL = NewOuterExitL;
2299 for (
auto *SuccBB : llvm::concat<BasicBlock *const>(
ArrayRef(RetainedSuccBB),
2301 if (SuccBB->getUniquePredecessor() ||
2303 return PredBB == ParentBB || DT.
dominates(SuccBB, PredBB);
2306 DominatingSucc[BB] = SuccBB;
2325 for (
auto *SuccBB : UnswitchedSuccBBs) {
2328 L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
2329 DominatingSucc, *VMaps.
back(), DTUpdates, AC, DT, LI, MSSAU, SE);
2334 if (TI.
getMetadata(LLVMContext::MD_make_implicit)) {
2338 TI.
setMetadata(LLVMContext::MD_make_implicit,
nullptr);
2345 TI.
setMetadata(LLVMContext::MD_make_implicit,
nullptr);
2352 SplitBB->getTerminator()->eraseFromParent();
2356 NewTI->
insertInto(ParentBB, ParentBB->end());
2377 DTUpdates.
push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2379 assert(SI &&
"Must either be a branch or switch!");
2382 assert(SI->getDefaultDest() == RetainedSuccBB &&
2383 "Not retaining default successor!");
2384 SI->setDefaultDest(LoopPH);
2385 for (
const auto &Case : SI->cases())
2386 if (Case.getCaseSuccessor() == RetainedSuccBB)
2387 Case.setSuccessor(LoopPH);
2389 Case.setSuccessor(ClonedPHs.
find(Case.getCaseSuccessor())->second);
2392 SI->setCondition(
new FreezeInst(SI->getCondition(),
2393 SI->getCondition()->getName() +
".fr",
2394 SI->getIterator()));
2401 {DominatorTree::Insert, SplitBB, ClonedPHs.
find(SuccBB)->second});
2415 for (
auto &VMap : VMaps)
2431 "Only one possible unswitched block for a branch!");
2435 DTUpdates.
push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
2445 "Not retaining default successor!");
2446 for (
const auto &Case : NewSI->
cases())
2447 Case.getCaseSuccessor()->removePredecessor(
2455 DTUpdates.
push_back({DominatorTree::Delete, ParentBB, SuccBB});
2466 assert(BI &&
"Only branches have partial unswitching.");
2468 "Only one possible unswitched block for a branch!");
2472 if (PartiallyInvariant)
2474 *SplitBB, Invariants,
Direction, *ClonedPH, *LoopPH, L, MSSAU);
2477 *SplitBB, Invariants,
Direction, *ClonedPH, *LoopPH,
2480 DTUpdates.
push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2487 for (
auto &VMap : VMaps)
2507 for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)
2530 assert(DT.
verify(DominatorTree::VerificationLevel::Fast));
2532 if (BI && !PartiallyInvariant) {
2538 "Only one possible unswitched block for a branch!");
2550 bool ReplaceUnswitched =
2551 FullUnswitch || (Invariants.
size() == 1) || PartiallyInvariant;
2559 for (
Value *Invariant : Invariants) {
2560 assert(!isa<Constant>(Invariant) &&
2561 "Should not be replacing constant values!");
2564 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
2571 U.set(ContinueReplacement);
2572 else if (ReplaceUnswitched &&
2574 U.set(UnswitchedReplacement);
2591 auto UpdateLoop = [&](
Loop &UpdateL) {
2593 UpdateL.verifyLoop();
2594 for (
Loop *ChildL : UpdateL) {
2595 ChildL->verifyLoop();
2596 assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&
2597 "Perturbed a child loop's LCSSA form!");
2617 for (
Loop *UpdatedL :
2618 llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
2619 UpdateLoop(*UpdatedL);
2620 if (UpdatedL->isOutermost())
2621 OuterExitL =
nullptr;
2625 if (L.isOutermost())
2626 OuterExitL =
nullptr;
2631 if (OuterExitL != &L)
2632 for (
Loop *OuterL = ParentL; OuterL != OuterExitL;
2634 UpdateLoop(*OuterL);
2646 for (
Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
2647 if (UpdatedL->getParentLoop() == ParentL)
2649 postUnswitch(L, LoopUpdater, LoopName, IsStillLoop, PartiallyInvariant,
2650 InjectedCondition, SibLoops);
2673 auto BBCostIt = BBCostMap.
find(
N.getBlock());
2674 if (BBCostIt == BBCostMap.
end())
2678 auto DTCostIt = DTCostMap.
find(&
N);
2679 if (DTCostIt != DTCostMap.
end())
2680 return DTCostIt->second;
2685 N.begin(),
N.end(), BBCostIt->second,
2687 return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
2689 bool Inserted = DTCostMap.
insert({&
N,
Cost}).second;
2691 assert(Inserted &&
"Should not insert a node while visiting children!");
2726 SI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);
2728 BasicBlock *ThenBB = CondBr->getSuccessor(0),
2729 *TailBB = CondBr->getSuccessor(1);
2734 PHINode::Create(SI->getType(), 2,
"unswitched.select", SI->getIterator());
2735 Phi->addIncoming(SI->getTrueValue(), ThenBB);
2736 Phi->addIncoming(SI->getFalseValue(), HeadBB);
2737 Phi->setDebugLoc(SI->getDebugLoc());
2738 SI->replaceAllUsesWith(Phi);
2739 SI->eraseFromParent();
2781 GI->
getMetadata(LLVMContext::MD_prof), &DTU, &LI);
2788 GuardedBlock->
setName(
"guarded");
2839 return L.contains(SuccBB);
2841 NumCostMultiplierSkipped++;
2845 auto *ParentL = L.getParentLoop();
2846 int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
2847 : std::distance(LI.
begin(), LI.
end()));
2851 int UnswitchedClones = 0;
2852 for (
const auto &Candidate : UnswitchCandidates) {
2855 bool SkipExitingSuccessors = DT.
dominates(CondBlock, Latch);
2856 if (isa<SelectInst>(CI)) {
2861 if (!SkipExitingSuccessors)
2865 int NonExitingSuccessors =
2867 [SkipExitingSuccessors, &L](
const BasicBlock *SuccBB) {
2868 return !SkipExitingSuccessors || L.contains(SuccBB);
2870 UnswitchedClones +=
Log2_32(NonExitingSuccessors);
2878 unsigned ClonesPower =
2882 int SiblingsMultiplier =
2883 std::max((ParentL ? SiblingsCount
2893 CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
2897 <<
" (siblings " << SiblingsMultiplier <<
" * clones "
2898 << (1 << ClonesPower) <<
")"
2899 <<
" for unswitch candidate: " << TI <<
"\n");
2900 return CostMultiplier;
2908 assert(UnswitchCandidates.
empty() &&
"Should be!");
2912 if (isa<Constant>(
Cond))
2914 if (L.isLoopInvariant(
Cond)) {
2922 if (!Invariants.
empty())
2923 UnswitchCandidates.
push_back({
I, std::move(Invariants)});
2928 bool CollectGuards =
false;
2931 L.getHeader()->getParent()->getParent(), Intrinsic::experimental_guard);
2932 if (GuardDecl && !GuardDecl->use_empty())
2933 CollectGuards =
true;
2936 for (
auto *BB : L.blocks()) {
2940 for (
auto &
I : *BB) {
2941 if (
auto *SI = dyn_cast<SelectInst>(&
I)) {
2942 auto *
Cond = SI->getCondition();
2944 if (
Cond->getType()->isIntegerTy(1) && !SI->getType()->isIntegerTy(1))
2945 AddUnswitchCandidatesForInst(SI,
Cond);
2946 }
else if (CollectGuards &&
isGuard(&
I)) {
2950 if (!isa<Constant>(
Cond) && L.isLoopInvariant(
Cond))
2955 if (
auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
2958 if (!isa<Constant>(SI->getCondition()) &&
2959 L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor())
2960 UnswitchCandidates.
push_back({SI, {SI->getCondition()}});
2964 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
2965 if (!BI || !BI->isConditional() ||
2966 BI->getSuccessor(0) == BI->getSuccessor(1))
2969 AddUnswitchCandidatesForInst(BI, BI->getCondition());
2973 !
any_of(UnswitchCandidates, [&L](
auto &TerminatorAndInvariants) {
2974 return TerminatorAndInvariants.TI == L.getHeader()->getTerminator();
2979 dbgs() <<
"simple-loop-unswitch: Found partially invariant condition "
2980 << *
Info->InstToDuplicate[0] <<
"\n");
2981 PartialIVInfo = *
Info;
2982 PartialIVCondBranch = L.getHeader()->getTerminator();
2986 {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)});
2989 return !UnswitchCandidates.
empty();
3004 if (!L.contains(IfTrue)) {
3005 Pred = ICmpInst::getInversePredicate(Pred);
3010 if (L.isLoopInvariant(
LHS)) {
3011 Pred = ICmpInst::getSwappedPredicate(Pred);
3017 Pred = ICmpInst::ICMP_ULT;
3018 RHS = ConstantInt::get(
3030 if (L.isLoopInvariant(
LHS) || !L.isLoopInvariant(
RHS))
3033 if (Pred != ICmpInst::ICMP_ULT)
3036 if (!L.contains(IfTrue) || L.contains(IfFalse))
3040 if (L.getHeader() == IfTrue)
3057 assert(Weights.
size() == 2 &&
"Unexpected profile data!");
3059 auto Num = Weights[
Idx];
3060 auto Denom = Weights[0] + Weights[1];
3062 if (Denom == 0 || Num > Denom)
3065 if (LikelyTaken > ActualTaken)
3088static NonTrivialUnswitchCandidate
3092 assert(Candidate.hasPendingInjection() &&
"Nothing to inject!");
3093 BasicBlock *Preheader = L.getLoopPreheader();
3094 assert(Preheader &&
"Loop is not in simplified form?");
3096 "Unswitching branch of inner loop!");
3098 auto Pred = Candidate.PendingInjection->Pred;
3099 auto *
LHS = Candidate.PendingInjection->LHS;
3100 auto *
RHS = Candidate.PendingInjection->RHS;
3101 auto *InLoopSucc = Candidate.PendingInjection->InLoopSucc;
3102 auto *TI = cast<BranchInst>(Candidate.TI);
3103 auto *BB = Candidate.TI->getParent();
3104 auto *OutOfLoopSucc = InLoopSucc == TI->getSuccessor(0) ? TI->getSuccessor(1)
3105 : TI->getSuccessor(0);
3107 assert(L.contains(InLoopSucc) &&
"Not supported yet!");
3108 assert(!L.contains(OutOfLoopSucc) &&
"Not supported yet!");
3109 auto &Ctx = BB->getContext();
3112 assert(ICmpInst::isUnsigned(Pred) &&
"Not supported yet!");
3122 auto *InjectedCond =
3123 ICmpInst::Create(Instruction::ICmp, Pred,
LHS,
RHS,
"injected.cond",
3127 BB->getParent(), InLoopSucc);
3130 Builder.
CreateCondBr(InjectedCond, InLoopSucc, CheckBlock);
3133 Builder.
CreateCondBr(TI->getCondition(), TI->getSuccessor(0),
3134 TI->getSuccessor(1));
3138 for (
auto &
I : *InLoopSucc) {
3139 auto *PN = dyn_cast<PHINode>(&
I);
3142 auto *Inc = PN->getIncomingValueForBlock(BB);
3143 PN->addIncoming(Inc, CheckBlock);
3145 OutOfLoopSucc->replacePhiUsesWith(BB, CheckBlock);
3148 { DominatorTree::Insert, BB, CheckBlock },
3149 { DominatorTree::Insert, CheckBlock, InLoopSucc },
3150 { DominatorTree::Insert, CheckBlock, OutOfLoopSucc },
3151 { DominatorTree::Delete, BB, OutOfLoopSucc }
3157 L.addBasicBlockToLoop(CheckBlock, LI);
3169 LLVM_DEBUG(
dbgs() <<
"Injected a new loop-invariant branch " << *InvariantBr
3170 <<
" and considering it for unswitching.");
3171 ++NumInvariantConditionsInjected;
3172 return NonTrivialUnswitchCandidate(InvariantBr, { InjectedCond },
3193 assert(ICmpInst::isStrictPredicate(Pred));
3194 if (Compares.
size() < 2)
3197 for (
auto Prev = Compares.
begin(), Next = Compares.
begin() + 1;
3198 Next != Compares.
end(); ++Prev, ++Next) {
3202 InjectedInvariant ToInject(NonStrictPred,
LHS,
RHS, InLoopSucc);
3203 NonTrivialUnswitchCandidate Candidate(Prev->Term, { LHS, RHS },
3204 std::nullopt, std::move(ToInject));
3205 UnswitchCandidates.
push_back(std::move(Candidate));
3235 auto *Latch = L.getLoopLatch();
3239 assert(L.getLoopPreheader() &&
"Must have a preheader!");
3244 for (
auto *DTN = DT.
getNode(Latch); L.contains(DTN->getBlock());
3245 DTN = DTN->getIDom()) {
3248 BasicBlock *IfTrue =
nullptr, *IfFalse =
nullptr;
3249 auto *BB = DTN->getBlock();
3253 auto *Term = BB->getTerminator();
3267 CompareDesc
Desc(cast<BranchInst>(Term),
RHS, IfTrue);
3268 while (
auto *Zext = dyn_cast<ZExtInst>(
LHS))
3269 LHS = Zext->getOperand(0);
3270 CandidatesULT[
LHS].push_back(
Desc);
3274 for (
auto &It : CandidatesULT)
3276 UnswitchCandidates, L, ICmpInst::ICMP_ULT, It.second, DT);
3281 if (!L.isSafeToClone())
3283 for (
auto *BB : L.blocks())
3284 for (
auto &
I : *BB) {
3285 if (
I.getType()->isTokenTy() &&
I.isUsedOutsideOfBlock(BB))
3287 if (
auto *CB = dyn_cast<CallBase>(&
I)) {
3288 assert(!CB->cannotDuplicate() &&
"Checked by L.isSafeToClone().");
3289 if (CB->isConvergent())
3302 if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
3306 L.getUniqueExitBlocks(ExitBlocks);
3311 for (
auto *ExitBB : ExitBlocks) {
3312 auto It = ExitBB->getFirstNonPHIIt();
3313 if (isa<CleanupPadInst>(It) || isa<CatchSwitchInst>(It)) {
3314 LLVM_DEBUG(
dbgs() <<
"Cannot unswitch because of cleanuppad/catchswitch "
3342 L.getHeader()->getParent()->hasMinSize()
3346 for (
auto *BB : L.blocks()) {
3348 for (
auto &
I : *BB) {
3353 assert(
Cost >= 0 &&
"Must not have negative costs!");
3355 assert(LoopCost >= 0 &&
"Must not have negative loop costs!");
3356 BBCostMap[BB] =
Cost;
3380 if (isa<SelectInst>(TI))
3389 if (!Visited.
insert(SuccBB).second)
3397 if (!FullUnswitch) {
3398 auto &BI = cast<BranchInst>(TI);
3401 if (SuccBB == BI.getSuccessor(1))
3404 if (SuccBB == BI.getSuccessor(0))
3407 SuccBB == BI.getSuccessor(0)) ||
3409 SuccBB == BI.getSuccessor(1)))
3417 if (SuccBB->getUniquePredecessor() ||
3419 return PredBB == &BB || DT.
dominates(SuccBB, PredBB);
3423 "Non-duplicated cost should never exceed total loop cost!");
3432 int SuccessorsCount =
isGuard(&TI) ? 2 : Visited.
size();
3433 assert(SuccessorsCount > 1 &&
3434 "Cannot unswitch a condition without multiple distinct successors!");
3435 return (LoopCost -
Cost) * (SuccessorsCount - 1);
3438 std::optional<NonTrivialUnswitchCandidate> Best;
3439 for (
auto &Candidate : UnswitchCandidates) {
3444 !BI || Candidate.hasPendingInjection() ||
3445 (Invariants.
size() == 1 &&
3447 InstructionCost CandidateCost = ComputeUnswitchedCost(TI, FullUnswitch);
3451 int CostMultiplier =
3455 "cost multiplier needs to be in the range of 1..UnswitchThreshold");
3456 CandidateCost *= CostMultiplier;
3458 <<
" (multiplier: " << CostMultiplier <<
")"
3459 <<
" for unswitch candidate: " << TI <<
"\n");
3462 <<
" for unswitch candidate: " << TI <<
"\n");
3465 if (!Best || CandidateCost < Best->
Cost) {
3467 Best->Cost = CandidateCost;
3470 assert(Best &&
"Must be!");
3482 assert(isa<BranchInst>(TI) || isa<SwitchInst>(TI));
3492 if (
BranchInst *BI = dyn_cast<BranchInst>(&TI))
3497 Cond, &AC, L.getLoopPreheader()->getTerminator(), &DT);
3511 PartialIVCondBranch, L, LI, AA, MSSAU);
3514 PartialIVCondBranch, L, DT, LI, AA,
3517 if (UnswitchCandidates.
empty())
3521 dbgs() <<
"Considering " << UnswitchCandidates.
size()
3522 <<
" non-trivial loop invariant conditions for unswitching.\n");
3525 UnswitchCandidates, L, DT, LI, AC,
TTI, PartialIVInfo);
3527 assert(Best.TI &&
"Failed to find loop unswitch candidate");
3528 assert(Best.Cost &&
"Failed to compute cost");
3531 LLVM_DEBUG(
dbgs() <<
"Cannot unswitch, lowest cost found: " << *Best.Cost
3536 bool InjectedCondition =
false;
3537 if (Best.hasPendingInjection()) {
3539 InjectedCondition =
true;
3541 assert(!Best.hasPendingInjection() &&
3542 "All injections should have been done by now!");
3544 if (Best.TI != PartialIVCondBranch)
3548 if (
auto *SI = dyn_cast<SelectInst>(Best.TI)) {
3554 SI->getCondition(), &AC, L.getLoopPreheader()->getTerminator(), &DT);
3564 LLVM_DEBUG(
dbgs() <<
" Unswitching non-trivial (cost = " << Best.Cost
3565 <<
") terminator: " << *Best.TI <<
"\n");
3567 LI, AC, SE, MSSAU, LoopUpdater, InsertFreeze,
3599 assert(L.isRecursivelyLCSSAForm(DT, LI) &&
3600 "Loops must be in LCSSA form before unswitching.");
3603 if (!L.isLoopSimplifyForm())
3616 const Function *
F = L.getHeader()->getParent();
3629 bool ContinueWithNonTrivial =
3631 if (!ContinueWithNonTrivial)
3635 if (
F->hasOptSize())
3640 auto IsLoopNestCold = [&](
const Loop *L) {
3646 Parent = Parent->getParentLoop();
3651 while (!Worklist.
empty()) {
3689 Function &
F = *L.getHeader()->getParent();
3692 if (
auto OuterProxy =
3696 LLVM_DEBUG(
dbgs() <<
"Unswitching loop in " <<
F.getName() <<
": " << L
3699 std::optional<MemorySSAUpdater> MSSAU;
3706 &AR.
SE, MSSAU ? &*MSSAU :
nullptr, PSI, AR.
BFI, U))
3725 OS, MapClassName2PassName);
3728 OS << (NonTrivial ?
"" :
"no-") <<
"nontrivial;";
3729 OS << (Trivial ?
"" :
"no-") <<
"trivial";
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
DenseMap< Block *, BlockRelaxAux > Blocks
This file defines a set of templates that efficiently compute a dominator tree over a generic graph.
Module.h This file contains the declarations for the Module class.
This defines the Use class.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
This header provides classes for managing per-loop analyses.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
uint64_t IntrinsicInst * II
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
Provides some synthesis utilities to produce sequences of values.
This file implements a set that has insertion order iteration characteristics.
static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH)
Rewrite the PHI nodes in an unswitched loop exit basic block.
static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, bool Trivial, bool NonTrivial, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, LPMUpdater &LoopUpdater)
Unswitch control flow predicated on loop invariant conditions.
static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
This routine scans the loop to find a branch or switch which occurs before any side effects occur.
static cl::opt< bool > EnableNonTrivialUnswitch("enable-nontrivial-unswitch", cl::init(false), cl::Hidden, cl::desc("Forcibly enables non-trivial loop unswitching rather than " "following the configuration passed into the pass."))
static cl::opt< bool > UnswitchGuards("simple-loop-unswitch-guards", cl::init(true), cl::Hidden, cl::desc("If enabled, simple loop unswitching will also consider " "llvm.experimental.guard intrinsics as unswitch candidates."))
static SmallPtrSet< const BasicBlock *, 16 > recomputeLoopBlockSet(Loop &L, LoopInfo &LI)
Recompute the set of blocks in a loop after unswitching.
static int CalculateUnswitchCostMultiplier(const Instruction &TI, const Loop &L, const LoopInfo &LI, const DominatorTree &DT, ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates)
Cost multiplier is a way to limit potentially exponential behavior of loop-unswitch.
static void buildPartialInvariantUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > ToDuplicate, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L, MemorySSAUpdater *MSSAU)
Copy a set of loop invariant values, and conditionally branch on them.
static TinyPtrVector< Value * > collectHomogenousInstGraphLoopInvariants(const Loop &L, Instruction &Root, const LoopInfo &LI)
Collect all of the loop invariant input values transitively used by the homogeneous instruction graph...
static void deleteDeadClonedBlocks(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, ArrayRef< std::unique_ptr< ValueToValueMapTy > > VMaps, DominatorTree &DT, MemorySSAUpdater *MSSAU)
void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable)
Helper to visit a dominator subtree, invoking a callable on each node.
static BranchInst * turnSelectIntoBranch(SelectInst *SI, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, AssumptionCache *AC)
Turns a select instruction into implicit control flow branch, making the following replacement:
static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI)
void postUnswitch(Loop &L, LPMUpdater &U, StringRef LoopName, bool CurrentLoopValid, bool PartiallyInvariant, bool InjectedCondition, ArrayRef< Loop * > NewLoops)
static void buildPartialUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > Invariants, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze, const Instruction *I, AssumptionCache *AC, const DominatorTree &DT)
Copy a set of loop invariant values ToDuplicate and insert them at the end of BB and conditionally br...
static cl::opt< int > UnswitchNumInitialUnscaledCandidates("unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden, cl::desc("Number of unswitch candidates that are ignored when calculating " "cost multiplier."))
static bool shouldTryInjectInvariantCondition(const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS, const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L)
Returns true, if predicate described by ( Pred, LHS, RHS ) succeeding into blocks ( IfTrue,...
static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates, const Loop &L, const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC, const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo)
static cl::opt< bool > EnableUnswitchCostMultiplier("enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden, cl::desc("Enable unswitch cost multiplier that prohibits exponential " "explosion in nontrivial unswitch."))
static Value * skipTrivialSelect(Value *Cond)
static Loop * getTopMostExitingLoop(const BasicBlock *ExitBB, const LoopInfo &LI)
static bool collectUnswitchCandidatesWithInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L, const DominatorTree &DT, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
Collect unswitch candidates by invariant conditions that are not immediately present in the loop.
static cl::opt< int > UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden, cl::desc("The cost threshold for unswitching a loop."))
static void replaceLoopInvariantUses(const Loop &L, Value *Invariant, Constant &Replacement)
static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial branch if the condition is loop invariant.
static bool collectUnswitchCandidates(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, const Loop &L, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
static cl::opt< bool > InjectInvariantConditions("simple-loop-unswitch-inject-invariant-conditions", cl::Hidden, cl::desc("Whether we should inject new invariants and unswitch them to " "eliminate some existing (non-invariant) conditions."), cl::init(true))
static cl::opt< bool > FreezeLoopUnswitchCond("freeze-loop-unswitch-cond", cl::init(true), cl::Hidden, cl::desc("If enabled, the freeze instruction will be added to condition " "of loop unswitch to prevent miscompilation."))
static InstructionCost computeDomSubtreeCost(DomTreeNode &N, const SmallDenseMap< BasicBlock *, InstructionCost, 4 > &BBCostMap, SmallDenseMap< DomTreeNode *, InstructionCost, 4 > &DTCostMap)
Recursively compute the cost of a dominator subtree based on the per-block cost map provided.
static bool shouldInsertFreeze(Loop &L, Instruction &TI, DominatorTree &DT, AssumptionCache &AC)
static cl::opt< int > UnswitchSiblingsToplevelDiv("unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden, cl::desc("Toplevel siblings divisor for cost multiplier."))
static cl::opt< unsigned > MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", cl::desc("Max number of memory uses to explore during " "partial unswitching analysis"), cl::init(100), cl::Hidden)
static void canonicalizeForInvariantConditionInjection(CmpPredicate &Pred, Value *&LHS, Value *&RHS, BasicBlock *&IfTrue, BasicBlock *&IfFalse, const Loop &L)
Tries to canonicalize condition described by:
static bool areLoopExitPHIsLoopInvariant(const Loop &L, const BasicBlock &ExitingBB, const BasicBlock &ExitBB)
Check that all the LCSSA PHI nodes in the loop exit block have trivial incoming values along this edg...
static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH, bool FullUnswitch)
Rewrite the PHI nodes in the loop exit basic block and the split off unswitched block.
static bool insertCandidatesWithPendingInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, Loop &L, ICmpInst::Predicate Pred, ArrayRef< CompareDesc > Compares, const DominatorTree &DT)
Given chain of loop branch conditions looking like: br (Variant < Invariant1) br (Variant < Invariant...
static NonTrivialUnswitchCandidate injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, MemorySSAUpdater *MSSAU)
Materialize pending invariant condition of the given candidate into IR.
static cl::opt< bool > DropNonTrivialImplicitNullChecks("simple-loop-unswitch-drop-non-trivial-implicit-null-checks", cl::init(false), cl::Hidden, cl::desc("If enabled, drop make.implicit metadata in unswitched implicit " "null checks to save time analyzing if we can keep it."))
static cl::opt< unsigned > InjectInvariantConditionHotnesThreshold("simple-loop-unswitch-inject-invariant-condition-hotness-threshold", cl::Hidden, cl::desc("Only try to inject loop invariant conditions and " "unswitch on them to eliminate branches that are " "not-taken 1/<this option> times or less."), cl::init(16))
static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial switch if the condition is loop invariant.
static void unswitchNontrivialInvariants(Loop &L, Instruction &TI, ArrayRef< Value * > Invariants, IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater, bool InsertFreeze, bool InjectedCondition)
static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, LoopInfo &LI, SmallVectorImpl< Loop * > &HoistedLoops, ScalarEvolution *SE)
Rebuild a loop after unswitching removes some subset of blocks and edges.
static bool unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater)
static BasicBlock * buildClonedLoopBlocks(Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB, ArrayRef< BasicBlock * > ExitBlocks, BasicBlock *ParentBB, BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB, const SmallDenseMap< BasicBlock *, BasicBlock *, 16 > &DominatingSucc, ValueToValueMapTy &VMap, SmallVectorImpl< DominatorTree::UpdateType > &DTUpdates, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Build the cloned blocks for an unswitched copy of the given loop.
static void deleteDeadBlocksFromLoop(Loop &L, SmallVectorImpl< BasicBlock * > &ExitBlocks, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE, LPMUpdater &LoopUpdater)
bool shouldTryInjectBasingOnMetadata(const BranchInst *BI, const BasicBlock *TakenSucc)
Returns true, if metadata on BI allows us to optimize branching into TakenSucc via injection of invar...
static BranchInst * turnGuardIntoBranch(IntrinsicInst *GI, Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU)
Turns a llvm.experimental.guard intrinsic into implicit control flow branch, making the following rep...
static Loop * cloneLoopNest(Loop &OrigRootL, Loop *RootParentL, const ValueToValueMapTy &VMap, LoopInfo &LI)
Recursively clone the specified loop and all of its children.
static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Hoist the current loop up to the innermost loop containing a remaining exit.
static void buildClonedLoops(Loop &OrigL, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, LoopInfo &LI, SmallVectorImpl< Loop * > &NonChildClonedLoops)
Build the cloned loops of an original loop from unswitching.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A private abstract base class describing the concept of an individual alias analysis implementation.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
A cache of @llvm.assume calls within a function.
LLVM_ABI void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
InstListType::iterator iterator
Instruction iterators...
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
LLVM_ABI void swapSuccessors()
Swap the successors of this branch instruction.
bool isConditional() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
This is an important base class in LLVM.
LLVM_ABI bool isOneValue() const
Returns true if the value is one.
static DebugLoc getCompilerGenerated()
static DebugLoc getDropped()
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
void insertEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge insertion and update the tree.
static constexpr UpdateKind Delete
static constexpr UpdateKind Insert
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This class represents a freeze function that returns random concrete value if an operand is either a ...
This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to give precise answers on "may...
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
bool isRelational() const
Return true if the predicate is relational (not EQ or NE).
Value * CreateFreeze(Value *V, const Twine &Name="")
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void dropLocation()
Drop the instruction's debug location.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
bool isTerminator() const
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
A wrapper class for inspecting calls to intrinsic functions.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
BlockT * getHeader() const
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
void reserveBlocks(unsigned size)
interface to do reserve() for Blocks
iterator_range< block_iterator > blocks() const
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
void perform(const LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
LoopT * AllocateLoop(ArgsTy &&...Args)
LoopT * removeLoop(iterator I)
This removes the specified top-level loop from this loop info object.
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
unsigned getLoopDepth(const BlockT *BB) const
Return the loop nesting level of the specified block.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
void destroy(LoopT *L)
Destroy a loop that has been removed from the LoopInfo nest.
Represents a single loop in the control flow graph.
StringRef getName() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
An analysis that produces MemorySSA for a function.
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
LLVM_ABI void removeEdge(BasicBlock *From, BasicBlock *To)
Update the MemoryPhi in To following an edge deletion between From and To.
LLVM_ABI void updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VM, bool IgnoreIncomingWithNoClones=false)
Update MemorySSA after a loop was cloned, given the blocks in RPO order, the exit blocks and a 1:1 ma...
LLVM_ABI void removeDuplicatePhiEdgesBetween(const BasicBlock *From, const BasicBlock *To)
Update the MemoryPhi in To to have a single incoming edge from From, following a CFG change that repl...
LLVM_ABI void removeBlocks(const SmallSetVector< BasicBlock *, 8 > &DeadBlocks)
Remove all MemoryAcceses in a set of BasicBlocks about to be deleted.
LLVM_ABI void moveAllAfterSpliceBlocks(BasicBlock *From, BasicBlock *To, Instruction *Start)
From block was spliced into From and To.
LLVM_ABI MemoryAccess * createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point, bool CreationMustSucceed=true)
Create a MemoryAccess in MemorySSA at a specified point in a block.
LLVM_ABI void applyInsertUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT)
Apply CFG insert updates, analogous with the DT edge updates.
LLVM_ABI void applyUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT, bool UpdateDTFirst=false)
Apply CFG updates, analogous with the DT edge updates.
LLVM_ABI void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)
LLVM_ABI void updateExitBlocksForClonedLoop(ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, DominatorTree &DT)
Update phi nodes in exit block successors following cloning.
Encapsulates MemorySSA, including all data associated with memory accesses.
LLVM_ABI void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
const DefsList * getBlockDefs(const BasicBlock *BB) const
Return the list of MemoryDef's and MemoryPhi's for a given basic block.
A Module instance is used to store all the information related to an LLVM module.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
bool isColdBlock(const BBType *BB, BFIT *BFI) const
Returns true if BasicBlock BB is considered cold.
The main scalar evolution driver.
LLVM_ABI void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
LLVM_ABI void forgetTopmostLoop(const Loop *L)
LLVM_ABI void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
LLVM_ABI void forgetLcssaPhiWithNewPredecessor(Loop *L, PHINode *V)
Forget LCSSA phi node V of loop L to which a new predecessor was added, such that it may no longer be...
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
iterator begin()
Get an iterator to the beginning of the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
LLVM_ABI void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
LLVM_ABI Instruction::InstListType::iterator eraseFromParent()
Delegate the call to the underlying SwitchInst::eraseFromParent() and mark this object to not touch t...
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
LLVM_ABI CaseWeightOpt getSuccessorWeight(unsigned idx)
std::optional< uint32_t > CaseWeightOpt
LLVM_ABI SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
unsigned getSuccessorIndex() const
Returns successor index for current case successor.
BasicBlockT * getCaseSuccessor() const
Resolves successor for current case.
ConstantIntT * getCaseValue() const
Resolves case value for current case.
BasicBlock * getDefaultDest() const
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, InsertPosition InsertBefore=nullptr)
void setDefaultDest(BasicBlock *DefaultCase)
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
void push_back(EltTy NewVal)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
bool isIntegerTy() const
True if this is an instance of IntegerType.
LLVM_ABI unsigned getIntegerBitWidth() const
A Use represents the edge between a Value definition and its users.
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI Function * getDeclarationIfExists(const Module *M, ID id)
Look up the Function declaration of the intrinsic id in the Module M and return it if it exists.
bool match(Val *V, const Pattern &P)
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void stable_sort(R &&Range)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
auto successors(const MachineBasicBlock *BB)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI MDNode * findOptionMDForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for a loop.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
auto reverse(ContainerTy &&C)
detail::zippy< detail::zip_first, T, U, Args... > zip_first(T &&t, U &&u, Args &&...args)
zip iterator that, for the sake of efficiency, assumes the first iteratee to be the shortest.
void sort(IteratorTy Start, IteratorTy End)
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI bool VerifyLoopInfo
Enable verification of loop info.
LLVM_ABI bool VerifyMemorySSA
Enables verification of MemorySSA.
LLVM_ABI bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
LLVM_ABI llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
auto predecessors(const MachineBasicBlock *BB)
bool pred_empty(const BasicBlock *BB)
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
LLVM_ABI BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
LLVM_ABI std::optional< IVConditionInfo > hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold, const MemorySSA &MSSA, AAResults &AA)
Check if the loop header has a conditional branch that is not loop-invariant, because it involves loa...
LLVM_ABI bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put loop into LCSSA form.
LLVM_ABI void mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap)
Mark a cloned instruction as a new instance so that its source loc can be updated when remapped.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
A special type used by analysis passes to provide an address that identifies that particular analysis...
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Description of the encoding of one expression Op.
Struct to hold information about a partially invariant condition.
SmallVector< Instruction * > InstToDuplicate
Instructions that need to be duplicated and checked for the unswitching condition.
Constant * KnownValue
Constant to indicate for which value the condition is invariant.
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI
Direction
An enum for the direction of the loop.
A CRTP mix-in to automatically provide informational APIs needed for passes.