53#define DEBUG_TYPE "loop-interchange"
55STATISTIC(LoopsInterchanged,
"Number of loops interchanged");
59 cl::desc(
"Interchange if you gain more than this number"));
65 "Maximum number of load-store instructions that should be handled "
66 "in the dependency matrix. Higher value may lead to more interchanges "
67 "at the cost of compile-time"));
81using CharMatrix = std::vector<std::vector<char>>;
96 cl::desc(
"Minimum depth of loop nest considered for the transform"));
101 cl::desc(
"Maximum depth of loop nest considered for the transform"));
107 cl::desc(
"List of profitability heuristics to be used. They are applied in "
109 cl::list_init<RuleTy>({RuleTy::PerLoopCacheAnalysis,
110 RuleTy::PerInstrOrderCost,
111 RuleTy::ForVectorization}),
113 "Prioritize loop cache cost"),
114 clEnumValN(RuleTy::PerInstrOrderCost,
"instorder",
115 "Prioritize the IVs order of each instruction"),
116 clEnumValN(RuleTy::ForVectorization,
"vectorize",
117 "Prioritize vectorization"),
119 "Ignore profitability, force interchange (does not "
120 "work with other options)")));
125 for (RuleTy Rule : Rules) {
126 if (!Set.insert(Rule).second)
128 if (Rule == RuleTy::Ignore)
135 for (
auto &Row : DepMatrix) {
148 assert(Src->getParent() == Dst->getParent() && Src != Dst &&
149 "Expected Src and Dst to be different instructions in the same BB");
151 bool FoundSrc =
false;
171 ValueVector MemInstr;
177 if (!isa<Instruction>(
I))
179 if (
auto *Ld = dyn_cast<LoadInst>(&
I)) {
183 }
else if (
auto *St = dyn_cast<StoreInst>(&
I)) {
186 MemInstr.push_back(&
I);
192 <<
" Loads and Stores to analyze\n");
198 L->getStartLoc(), L->getHeader())
199 <<
"Number of loads/stores exceeded, the supported maximum "
200 "can be increased with option "
201 "-loop-interchange-maxmeminstr-count.";
205 ValueVector::iterator
I, IE, J, JE;
211 for (
I = MemInstr.begin(), IE = MemInstr.end();
I != IE; ++
I) {
212 for (J =
I, JE = MemInstr.end(); J != JE; ++J) {
213 std::vector<char> Dep;
217 if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
220 if (
auto D = DI->
depends(Src, Dst)) {
221 assert(
D->isOrdered() &&
"Expected an output, flow or anti dep.");
224 if (
D->normalize(SE))
227 D->isFlow() ?
"flow" :
D->isAnti() ?
"anti" :
"output";
228 dbgs() <<
"Found " << DepType
229 <<
" dependency between Src and Dst\n"
230 <<
" Src:" << *Src <<
"\n Dst:" << *Dst <<
'\n');
231 unsigned Levels =
D->getLevels();
233 for (
unsigned II = 1;
II <= Levels; ++
II) {
240 unsigned Dir =
D->getDirection(
II);
254 if (
D->isConfused()) {
255 assert(Dep.empty() &&
"Expected empty dependency vector");
256 Dep.assign(Level,
'*');
259 while (Dep.size() != Level) {
264 bool IsKnownForward =
true;
265 if (Src->getParent() != Dst->getParent()) {
269 IsKnownForward =
false;
275 "Unexpected instructions");
280 bool IsReversed =
D->getSrc() != Src;
282 IsKnownForward =
false;
298 DepMatrix.push_back(Dep);
305 DepMatrix[Ite->second].back() =
'*';
317 for (
auto &Row : DepMatrix)
326static std::optional<bool>
339 unsigned InnerLoopId,
340 unsigned OuterLoopId) {
341 unsigned NumRows = DepMatrix.size();
342 std::vector<char> Cur;
344 for (
unsigned Row = 0; Row < NumRows; ++Row) {
347 Cur = DepMatrix[Row];
360 std::swap(Cur[InnerLoopId], Cur[OuterLoopId]);
369 << L.getHeader()->getParent()->getName() <<
" Loop: %"
370 << L.getHeader()->getName() <<
'\n');
371 assert(LoopList.empty() &&
"LoopList should initially be empty!");
372 Loop *CurrentLoop = &L;
373 const std::vector<Loop *> *Vec = &CurrentLoop->
getSubLoops();
374 while (!Vec->empty()) {
378 if (Vec->size() != 1) {
383 LoopList.push_back(CurrentLoop);
384 CurrentLoop = Vec->front();
387 LoopList.push_back(CurrentLoop);
392 unsigned LoopNestDepth = LoopList.
size();
394 LLVM_DEBUG(
dbgs() <<
"Unsupported depth of loop nest " << LoopNestDepth
402 <<
"Unsupported depth of loop nest, the supported range is ["
413 for (
Loop *L : LoopList) {
415 if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
419 if (L->getNumBackEdges() != 1) {
423 if (!L->getExitingBlock()) {
434class LoopInterchangeLegality {
438 : OuterLoop(
Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
441 bool canInterchangeLoops(
unsigned InnerLoopId,
unsigned OuterLoopId,
442 CharMatrix &DepMatrix);
450 bool isLoopStructureUnderstood();
452 bool currentLimitations();
455 return OuterInnerReductions;
459 return InnerLoopInductions;
463 return HasNoWrapReductions;
467 bool tightlyNested(
Loop *Outer,
Loop *Inner);
468 bool containsUnsafeInstructions(
BasicBlock *BB);
474 bool findInductionAndReductions(
Loop *L,
502class CacheCostManager {
509 std::optional<std::unique_ptr<CacheCost>> CC;
515 void computeIfUnitinialized();
520 : OutermostLoop(OutermostLoop), AR(AR), DI(DI) {}
527class LoopInterchangeProfitability {
531 : OuterLoop(
Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
535 unsigned InnerLoopId,
unsigned OuterLoopId,
536 CharMatrix &DepMatrix, CacheCostManager &CCM);
539 int getInstrOrderCost();
540 std::optional<bool> isProfitablePerLoopCacheAnalysis(
542 std::optional<bool> isProfitablePerInstrOrderCost();
543 std::optional<bool> isProfitableForVectorization(
unsigned InnerLoopId,
544 unsigned OuterLoopId,
545 CharMatrix &DepMatrix);
557class LoopInterchangeTransform {
561 const LoopInterchangeLegality &LIL)
562 : OuterLoop(
Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}
566 void restructureLoops(
Loop *NewInner,
Loop *NewOuter,
569 void removeChildLoop(
Loop *OuterLoop,
Loop *InnerLoop);
572 bool adjustLoopLinks();
573 bool adjustLoopBranches();
584 const LoopInterchangeLegality &LIL;
587struct LoopInterchange {
600 : SE(SE), LI(LI), DI(DI), DT(DT), AR(AR), ORE(ORE) {}
603 if (
L->getParentLoop())
607 return processLoopList(LoopList);
612 for (
unsigned I = 1;
I < LoopList.size(); ++
I)
613 if (LoopList[
I]->getParentLoop() != LoopList[
I - 1])
615 return processLoopList(LoopList);
621 return LoopList.
size() - 1;
625 bool Changed =
false;
629 "Unsupported depth of loop nest.");
631 unsigned LoopNestDepth = LoopList.
size();
633 LLVM_DEBUG(
dbgs() <<
"Processing LoopList of size = " << LoopNestDepth
636 CharMatrix DependencyMatrix;
637 Loop *OuterMostLoop = *(LoopList.
begin());
639 OuterMostLoop, DI, SE, ORE)) {
654 unsigned SelecLoopId = selectLoopForInterchange(LoopList);
655 CacheCostManager CCM(LoopList[0], AR, DI);
660 for (
unsigned j = SelecLoopId;
j > 0;
j--) {
661 bool ChangedPerIter =
false;
662 for (
unsigned i = SelecLoopId; i > SelecLoopId -
j; i--) {
664 processLoop(LoopList, i, i - 1, DependencyMatrix, CCM);
665 ChangedPerIter |= Interchanged;
666 Changed |= Interchanged;
677 unsigned OuterLoopId,
678 std::vector<std::vector<char>> &DependencyMatrix,
679 CacheCostManager &CCM) {
680 Loop *OuterLoop = LoopList[OuterLoopId];
681 Loop *InnerLoop = LoopList[InnerLoopId];
683 <<
" and OuterLoopId = " << OuterLoopId <<
"\n");
684 LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
685 if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
686 LLVM_DEBUG(
dbgs() <<
"Not interchanging loops. Cannot prove legality.\n");
690 LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
691 if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
692 DependencyMatrix, CCM)) {
701 <<
"Loop interchanged with enclosing loop.";
704 LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
705 LIT.transform(LIL.getHasNoWrapReductions());
712 std::swap(LoopList[OuterLoopId], LoopList[InnerLoopId]);
725bool LoopInterchangeLegality::containsUnsafeInstructions(
BasicBlock *BB) {
727 return I.mayHaveSideEffects() ||
I.mayReadFromMemory();
731bool LoopInterchangeLegality::tightlyNested(
Loop *OuterLoop,
Loop *InnerLoop) {
743 if (!OuterLoopHeaderBI)
747 if (Succ != InnerLoopPreHeader && Succ != InnerLoop->
getHeader() &&
748 Succ != OuterLoopLatch)
751 LLVM_DEBUG(
dbgs() <<
"Checking instructions in Loop header and Loop latch\n");
754 if (containsUnsafeInstructions(OuterLoopHeader) ||
755 containsUnsafeInstructions(OuterLoopLatch))
761 if (InnerLoopPreHeader != OuterLoopHeader &&
762 containsUnsafeInstructions(InnerLoopPreHeader))
770 if (&SuccInner != OuterLoopLatch) {
772 <<
" does not lead to the outer loop latch.\n";);
778 if (containsUnsafeInstructions(InnerLoopExit))
786bool LoopInterchangeLegality::isLoopStructureUnderstood() {
788 for (
PHINode *InnerInduction : InnerLoopInductions) {
789 unsigned Num = InnerInduction->getNumOperands();
790 for (
unsigned i = 0; i < Num; ++i) {
791 Value *Val = InnerInduction->getOperand(i);
792 if (isa<Constant>(Val))
801 if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
802 InnerLoopPreheader &&
822 Value *Op0 = InnerLoopCmp->getOperand(0);
823 Value *Op1 = InnerLoopCmp->getOperand(1);
834 std::function<
bool(
Value *)> IsPathToInnerIndVar;
835 IsPathToInnerIndVar = [
this, &IsPathToInnerIndVar](
const Value *
V) ->
bool {
838 if (isa<Constant>(V))
843 if (isa<CastInst>(
I))
844 return IsPathToInnerIndVar(
I->getOperand(0));
845 if (isa<BinaryOperator>(
I))
846 return IsPathToInnerIndVar(
I->getOperand(0)) &&
847 IsPathToInnerIndVar(
I->getOperand(1));
853 if (IsPathToInnerIndVar(Op0) && IsPathToInnerIndVar(Op1))
858 if (IsPathToInnerIndVar(Op0) && !isa<Constant>(Op0)) {
861 }
else if (IsPathToInnerIndVar(Op1) && !isa<Constant>(Op1)) {
884 if (
PHI->getNumIncomingValues() != 1)
894 if (isa<Constant>(V))
899 if (
PHI->getNumIncomingValues() == 1)
912 case RecurKind::SMin:
913 case RecurKind::SMax:
914 case RecurKind::UMin:
915 case RecurKind::UMax:
916 case RecurKind::FAdd:
917 case RecurKind::FMul:
918 case RecurKind::FMin:
919 case RecurKind::FMax:
920 case RecurKind::FMinimum:
921 case RecurKind::FMaximum:
922 case RecurKind::FMinimumNum:
923 case RecurKind::FMaximumNum:
924 case RecurKind::FMulAdd:
925 case RecurKind::AnyOf:
942 case RecurKind::Mul: {
951 assert(
I->getOpcode() == OpCode &&
952 "Expected the instruction to be the reduction operation");
957 if (
I->hasNoSignedWrap() ||
I->hasNoUnsignedWrap())
974bool LoopInterchangeLegality::findInductionAndReductions(
976 if (!
L->getLoopLatch() || !
L->getLoopPredecessor())
986 if (!OuterInnerReductions.count(&
PHI)) {
988 "across the outer loop.\n");
992 assert(
PHI.getNumIncomingValues() == 2 &&
993 "Phis in loop header should have exactly 2 incoming values");
1003 <<
"Failed to recognize PHI as an induction or reduction.\n");
1006 OuterInnerReductions.insert(&
PHI);
1007 OuterInnerReductions.insert(InnerRedPhi);
1016bool LoopInterchangeLegality::currentLimitations() {
1026 dbgs() <<
"Loops where the latch is not the exiting block are not"
1027 <<
" supported currently.\n");
1032 <<
"Loops where the latch is not the exiting block cannot be"
1033 " interchange currently.";
1039 if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
1041 dbgs() <<
"Only outer loops with induction or reduction PHI nodes "
1042 <<
"are supported currently.\n");
1047 <<
"Only outer loops with induction or reduction PHI nodes can be"
1048 " interchanged currently.";
1057 Loop *CurLevelLoop = OuterLoop;
1060 CurLevelLoop = CurLevelLoop->
getSubLoops().front();
1061 if (!findInductionAndReductions(CurLevelLoop, Inductions,
nullptr)) {
1063 dbgs() <<
"Only inner loops with induction or reduction PHI nodes "
1064 <<
"are supported currently.\n");
1069 <<
"Only inner loops with induction or reduction PHI nodes can be"
1070 " interchange currently.";
1077 if (!isLoopStructureUnderstood()) {
1083 <<
"Inner loop structure not understood currently.";
1091bool LoopInterchangeLegality::findInductions(
1098 return !Inductions.
empty();
1110 if (
PHI.getNumIncomingValues() > 1)
1113 PHINode *PN = dyn_cast<PHINode>(U);
1115 (!Reductions.count(PN) && OuterL->contains(PN->getParent()));
1180 for (
auto *U :
PHI.users()) {
1189bool LoopInterchangeLegality::canInterchangeLoops(
unsigned InnerLoopId,
1190 unsigned OuterLoopId,
1191 CharMatrix &DepMatrix) {
1193 LLVM_DEBUG(
dbgs() <<
"Failed interchange InnerLoopId = " << InnerLoopId
1194 <<
" and OuterLoopId = " << OuterLoopId
1195 <<
" due to dependence\n");
1200 <<
"Cannot interchange loops due to dependences.";
1205 for (
auto *BB : OuterLoop->
blocks())
1207 if (
CallInst *CI = dyn_cast<CallInst>(&
I)) {
1209 if (CI->onlyWritesMemory())
1212 dbgs() <<
"Loops with call instructions cannot be interchanged "
1218 <<
"Cannot interchange loops due to call instruction.";
1224 if (!findInductions(InnerLoop, InnerLoopInductions)) {
1225 LLVM_DEBUG(
dbgs() <<
"Could not find inner loop induction variables.\n");
1230 LLVM_DEBUG(
dbgs() <<
"Found unsupported PHI nodes in inner loop latch.\n");
1235 <<
"Cannot interchange loops because unsupported PHI nodes found "
1236 "in inner loop latch.";
1243 if (currentLimitations()) {
1244 LLVM_DEBUG(
dbgs() <<
"Not legal because of current transform limitation\n");
1249 if (!tightlyNested(OuterLoop, InnerLoop)) {
1255 <<
"Cannot interchange loops because they are not tightly "
1262 OuterInnerReductions)) {
1263 LLVM_DEBUG(
dbgs() <<
"Found unsupported PHI nodes in inner loop exit.\n");
1268 <<
"Found unsupported PHI node in loop exit.";
1274 LLVM_DEBUG(
dbgs() <<
"Found unsupported PHI nodes in outer loop exit.\n");
1279 <<
"Found unsupported PHI node in loop exit.";
1287void CacheCostManager::computeIfUnitinialized() {
1306CacheCost *CacheCostManager::getCacheCost() {
1307 computeIfUnitinialized();
1312 computeIfUnitinialized();
1316int LoopInterchangeProfitability::getInstrOrderCost() {
1317 unsigned GoodOrder, BadOrder;
1318 BadOrder = GoodOrder = 0;
1322 bool FoundInnerInduction =
false;
1323 bool FoundOuterInduction =
false;
1339 if (AR->
getLoop() == InnerLoop) {
1342 FoundInnerInduction =
true;
1343 if (FoundOuterInduction) {
1353 if (AR->
getLoop() == OuterLoop) {
1356 FoundOuterInduction =
true;
1357 if (FoundInnerInduction) {
1366 return GoodOrder - BadOrder;
1370LoopInterchangeProfitability::isProfitablePerLoopCacheAnalysis(
1375 auto InnerLoopIt = CostMap.
find(InnerLoop);
1376 if (InnerLoopIt == CostMap.
end())
1377 return std::nullopt;
1378 auto OuterLoopIt = CostMap.
find(OuterLoop);
1379 if (OuterLoopIt == CostMap.
end())
1380 return std::nullopt;
1383 return std::nullopt;
1384 unsigned InnerIndex = InnerLoopIt->second;
1385 unsigned OuterIndex = OuterLoopIt->second;
1387 <<
", OuterIndex = " << OuterIndex <<
"\n");
1388 assert(InnerIndex != OuterIndex &&
"CostMap should assign unique "
1389 "numbers to each loop");
1390 return std::optional<bool>(InnerIndex < OuterIndex);
1394LoopInterchangeProfitability::isProfitablePerInstrOrderCost() {
1398 int Cost = getInstrOrderCost();
1401 return std::optional<bool>(
true);
1403 return std::nullopt;
1408 for (
const auto &Dep : DepMatrix) {
1409 char Dir = Dep[LoopId];
1410 char DepType = Dep.back();
1411 assert((DepType ==
'<' || DepType ==
'*') &&
1412 "Unexpected element in dependency vector");
1415 if (Dir ==
'=' || Dir ==
'I')
1421 if (Dir ==
'<' && DepType ==
'<')
1430std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
1431 unsigned InnerLoopId,
unsigned OuterLoopId, CharMatrix &DepMatrix) {
1447 return std::nullopt;
1450bool LoopInterchangeProfitability::isProfitable(
1451 const Loop *InnerLoop,
const Loop *OuterLoop,
unsigned InnerLoopId,
1452 unsigned OuterLoopId, CharMatrix &DepMatrix, CacheCostManager &CCM) {
1458 "Duplicate rules and option 'ignore' are not allowed");
1468 std::optional<bool> shouldInterchange;
1471 case RuleTy::PerLoopCacheAnalysis: {
1474 shouldInterchange = isProfitablePerLoopCacheAnalysis(CostMap, CC);
1477 case RuleTy::PerInstrOrderCost:
1478 shouldInterchange = isProfitablePerInstrOrderCost();
1480 case RuleTy::ForVectorization:
1482 isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
1484 case RuleTy::Ignore:
1491 if (shouldInterchange.has_value())
1495 if (!shouldInterchange.has_value()) {
1500 <<
"Insufficient information to calculate the cost of loop for "
1504 }
else if (!shouldInterchange.value()) {
1509 <<
"Interchanging loops is not considered to improve cache "
1510 "locality nor vectorization.";
1517void LoopInterchangeTransform::removeChildLoop(
Loop *OuterLoop,
1519 for (
Loop *L : *OuterLoop)
1520 if (L == InnerLoop) {
1521 OuterLoop->removeChildLoop(L);
1550void LoopInterchangeTransform::restructureLoops(
1560 if (OuterLoopParent) {
1562 removeChildLoop(OuterLoopParent, NewInner);
1563 removeChildLoop(NewInner, NewOuter);
1566 removeChildLoop(NewInner, NewOuter);
1591 if (BB == OuterHeader || BB == OuterLatch)
1606bool LoopInterchangeTransform::transform(
1608 bool Transformed =
false;
1613 auto &InductionPHIs = LIL.getInnerLoopInductions();
1614 if (InductionPHIs.empty()) {
1615 LLVM_DEBUG(
dbgs() <<
"Failed to find the point to split loop latch \n");
1620 for (
PHINode *CurInductionPHI : InductionPHIs) {
1621 if (CurInductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
1623 dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(1)));
1626 dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(0)));
1639 auto MoveInstructions = [&i, &WorkList,
this, &InductionPHIs, NewLatch]() {
1640 for (; i < WorkList.
size(); i++) {
1646 "Moving instructions with side-effects may change behavior of "
1657 for (
Value *
Op : WorkList[i]->operands()) {
1675 for (
Instruction *InnerIndexVar : InnerIndexVarList)
1676 WorkList.
insert(cast<Instruction>(InnerIndexVar));
1694 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1695 if (InnerLoopPreHeader != OuterLoopHeader) {
1698 std::prev(InnerLoopPreHeader->
end()))))
1702 Transformed |= adjustLoopLinks();
1734 I->removeFromParent();
1749 std::vector<DominatorTree::UpdateType> &DTUpdates,
1750 bool MustUpdateOnce =
true) {
1752 "BI must jump to OldBB exactly once.");
1753 bool Changed =
false;
1761 DTUpdates.push_back(
1762 {DominatorTree::UpdateKind::Insert, BI->
getParent(), NewBB});
1763 DTUpdates.push_back(
1764 {DominatorTree::UpdateKind::Delete, BI->
getParent(), OldBB});
1766 assert(Changed &&
"Expected a successor to be updated");
1783 assert(
P.getNumIncomingValues() == 1 &&
1784 "Only loops with a single exit are supported!");
1787 auto IncI = cast<Instruction>(
P.getIncomingValueForBlock(InnerLatch));
1790 auto IncIInnerMost = cast<Instruction>(
followLCSSA(IncI));
1793 if (IncIInnerMost->getParent() != InnerLatch &&
1794 IncIInnerMost->
getParent() != InnerHeader)
1798 [OuterHeader, OuterExit, IncI, InnerHeader](
User *U) {
1799 return (cast<PHINode>(U)->getParent() == OuterHeader &&
1800 IncI->getParent() == InnerHeader) ||
1801 cast<PHINode>(U)->getParent() == OuterExit;
1803 "Can only replace phis iff the uses are in the loop nest exit or "
1804 "the incoming value is defined in the inner header (it will "
1805 "dominate all loop blocks after interchanging)");
1806 P.replaceAllUsesWith(IncI);
1807 P.eraseFromParent();
1835 if (
P.getNumIncomingValues() != 1)
1839 auto I = dyn_cast<Instruction>(
P.getIncomingValue(0));
1843 PHINode *NewPhi = dyn_cast<PHINode>(
P.clone());
1849 if (Pred == OuterLatch)
1854 P.setIncomingValue(0, NewPhi);
1864bool LoopInterchangeTransform::adjustLoopBranches() {
1866 std::vector<DominatorTree::UpdateType> DTUpdates;
1868 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1871 assert(OuterLoopPreHeader != OuterLoop->getHeader() &&
1872 InnerLoopPreHeader != InnerLoop->
getHeader() && OuterLoopPreHeader &&
1873 InnerLoopPreHeader &&
"Guaranteed by loop-simplify form");
1878 if (isa<PHINode>(OuterLoopPreHeader->
begin()) ||
1880 OuterLoopPreHeader =
1882 if (InnerLoopPreHeader == OuterLoop->getHeader())
1883 InnerLoopPreHeader =
1888 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1890 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1906 if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
1907 !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
1912 dyn_cast<BranchInst>(InnerLoopLatchPredecessor->
getTerminator());
1914 dyn_cast<BranchInst>(OuterLoopPredecessor->
getTerminator());
1916 if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
1919 if (!InnerLoopHeaderSuccessor)
1927 InnerLoopPreHeader, DTUpdates,
false);
1937 InnerLoopHeaderSuccessor, DTUpdates,
1945 OuterLoopPreHeader, DTUpdates);
1948 if (InnerLoopLatchBI->
getSuccessor(0) == InnerLoopHeader)
1949 InnerLoopLatchSuccessor = InnerLoopLatchBI->
getSuccessor(1);
1951 InnerLoopLatchSuccessor = InnerLoopLatchBI->
getSuccessor(0);
1954 InnerLoopLatchSuccessor, DTUpdates);
1956 if (OuterLoopLatchBI->
getSuccessor(0) == OuterLoopHeader)
1957 OuterLoopLatchSuccessor = OuterLoopLatchBI->
getSuccessor(1);
1959 OuterLoopLatchSuccessor = OuterLoopLatchBI->
getSuccessor(0);
1962 OuterLoopLatchSuccessor, DTUpdates);
1963 updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
1967 restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
1968 OuterLoopPreHeader);
1970 moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
1971 OuterLoopHeader, OuterLoopLatch, InnerLoop->
getExitBlock(),
1977 auto &OuterInnerReductions = LIL.getOuterInnerReductions();
1981 if (OuterInnerReductions.contains(&
PHI))
1985 if (OuterInnerReductions.contains(&
PHI))
1994 assert(OuterInnerReductions.count(
PHI) &&
"Expected a reduction PHI node");
1999 assert(OuterInnerReductions.count(
PHI) &&
"Expected a reduction PHI node");
2021bool LoopInterchangeTransform::adjustLoopLinks() {
2023 bool Changed = adjustLoopBranches();
2028 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
2053 LLVM_DEBUG(
dbgs() <<
"Not valid loop candidate for interchange\n");
2061 <<
"Computed dependence info, invoking the transform.";
2065 if (!LoopInterchange(&AR.
SE, &AR.
LI, &DI, &AR.
DT, &AR, &ORE).run(LN))
2067 U.markLoopNestChanged(
true);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file defines the StringMap class.
ReachingDefAnalysis InstSet InstSet & Ignore
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Given that RA is a live propagate it s liveness to any other values it uses(according to Uses). void DeadArgumentEliminationPass
This file defines the interface for the loop cache analysis.
static cl::opt< unsigned int > MaxMemInstrCount("loop-interchange-max-meminstr-count", cl::init(64), cl::Hidden, cl::desc("Maximum number of load-store instructions that should be handled " "in the dependency matrix. Higher value may lead to more interchanges " "at the cost of compile-time"))
static cl::opt< int > LoopInterchangeCostThreshold("loop-interchange-threshold", cl::init(0), cl::Hidden, cl::desc("Interchange if you gain more than this number"))
static PHINode * findInnerReductionPhi(Loop *L, Value *V, SmallVectorImpl< Instruction * > &HasNoWrapInsts)
static cl::opt< unsigned int > MinLoopNestDepth("loop-interchange-min-loop-nest-depth", cl::init(2), cl::Hidden, cl::desc("Minimum depth of loop nest considered for the transform"))
static bool isComputableLoopNest(ScalarEvolution *SE, ArrayRef< Loop * > LoopList)
static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore)
Move all instructions except the terminator from FromBB right before InsertBefore.
static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx, unsigned ToIndx)
static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader, BasicBlock *InnerLatch, BasicBlock *OuterHeader, BasicBlock *OuterLatch, BasicBlock *OuterExit, Loop *InnerLoop, LoopInfo *LI)
static void printDepMatrix(CharMatrix &DepMatrix)
static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2)
Swap instructions between BB1 and BB2 but keep terminators intact.
static cl::opt< unsigned int > MaxLoopNestDepth("loop-interchange-max-loop-nest-depth", cl::init(10), cl::Hidden, cl::desc("Maximum depth of loop nest considered for the transform"))
static bool hasSupportedLoopDepth(ArrayRef< Loop * > LoopList, OptimizationRemarkEmitter &ORE)
static bool inThisOrder(const Instruction *Src, const Instruction *Dst)
Return true if Src appears before Dst in the same basic block.
static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
static bool canVectorize(const CharMatrix &DepMatrix, unsigned LoopId)
Return true if we can vectorize the loop specified by LoopId.
static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, unsigned InnerLoopId, unsigned OuterLoopId)
static bool areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL, SmallPtrSetImpl< PHINode * > &Reductions)
static Value * followLCSSA(Value *SV)
static void populateWorklist(Loop &L, LoopVector &LoopList)
static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, Loop *L, DependenceInfo *DI, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE)
static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB, BasicBlock *NewBB, std::vector< DominatorTree::UpdateType > &DTUpdates, bool MustUpdateOnce=true)
static std::optional< bool > isLexicographicallyPositive(ArrayRef< char > DV, unsigned Begin, unsigned End)
static cl::list< RuleTy > Profitabilities("loop-interchange-profitabilities", cl::ZeroOrMore, cl::MiscFlags::CommaSeparated, cl::Hidden, cl::desc("List of profitability heuristics to be used. They are applied in " "the given order"), cl::list_init< RuleTy >({RuleTy::PerLoopCacheAnalysis, RuleTy::PerInstrOrderCost, RuleTy::ForVectorization}), cl::values(clEnumValN(RuleTy::PerLoopCacheAnalysis, "cache", "Prioritize loop cache cost"), clEnumValN(RuleTy::PerInstrOrderCost, "instorder", "Prioritize the IVs order of each instruction"), clEnumValN(RuleTy::ForVectorization, "vectorize", "Prioritize vectorization"), clEnumValN(RuleTy::Ignore, "ignore", "Ignore profitability, force interchange (does not " "work with other options)")))
static bool noDuplicateRulesAndIgnore(ArrayRef< RuleTy > Rules)
This file defines the interface for the loop nest analysis.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
loop Loop Strength Reduction
uint64_t IntrinsicInst * II
This file defines the SmallSet class.
This file defines the SmallVector class.
static bool isProfitable(const StableFunctionMap::StableFunctionEntries &SFS)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A container for analyses that lazily runs them and caches their results.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI void replacePhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
const Function * getParent() const
Return the enclosing method, or null if none.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
CacheCost represents the estimated cost of a inner loop as the number of cache lines used by the memo...
static std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Create a CacheCost for the loop nest rooted by Root.
CacheCostTy getLoopCost(const Loop &L) const
Return the estimated cost of loop L if the given loop is part of the loop nest associated with this o...
This class represents a function call, abstracting a target machine's calling convention.
This class is the base class for the comparison instructions.
This class represents an Operation in the Expression.
iterator find(const_arg_type_t< KeyT > Val)
DependenceInfo - This class is the main dependence-analysis driver.
LLVM_ABI std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool UnderRuntimeAssumptions=false)
depends - Tests for a dependence between the Src and Dst instructions.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
static LLVM_ABI bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
void removeBlockFromLoop(BlockT *BB)
This removes the specified basic block from the current loop, updating the Blocks as appropriate.
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
BlockT * getHeader() const
iterator_range< block_iterator > blocks() const
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
void addBlockEntry(BlockT *BB)
This adds a basic block directly to the basic block list.
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
void changeTopLevelLoop(LoopT *OldLoop, LoopT *NewLoop)
Replace the specified loop in the top-level loops list with the indicated loop.
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
static const BasicBlock & skipEmptyBlockUntil(const BasicBlock *From, const BasicBlock *End, bool CheckUniquePred=false)
Recursivelly traverse all empty 'single successor' basic blocks of From (if there are any).
ArrayRef< Loop * > getLoops() const
Get the loops in the nest.
Function * getParent() const
Return the function to which the loop-nest belongs.
Loop & getOutermostLoop() const
Return the outermost loop in the loop nest.
Represents a single loop in the control flow graph.
bool isLoopInvariant(const Value *V, bool HasCoroSuspendInst=false) const
Return true if the specified value is loop invariant.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
void setIncomingBlock(unsigned i, BasicBlock *BB)
void setIncomingValue(unsigned i, Value *V)
static unsigned getIncomingValueNumForOperand(unsigned i)
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
unsigned getOpcode() const
static LLVM_ABI bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
LLVM_ABI SmallVector< Instruction *, 4 > getReductionOpChain(PHINode *Phi, Loop *L) const
Attempts to find a chain of operations from Phi to LoopExitInst that can be treated as a set of reduc...
RecurKind getRecurrenceKind() const
This node represents a polynomial recurrence on the trip count of the specified loop.
const Loop * getLoop() const
This class represents an analyzed expression in the program.
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
std::pair< iterator, bool > try_emplace(StringRef Key, ArgsTy &&...Args)
Emplace a new element for the specified key into the map if the key isn't already in the map.
StringRef - Represent a constant reference to a string, i.e.
constexpr size_t size() const
size - Get the string size.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
auto successors(const MachineBasicBlock *BB)
LLVM_ABI bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto map_range(ContainerTy &&C, FuncTy F)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
RecurKind
These are the kinds of recurrences that we support.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
LLVM_ABI bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
auto predecessors(const MachineBasicBlock *BB)
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Direction
An enum for the direction of the loop.