LLVM 22.0.0git
LoopInterchange.cpp
Go to the documentation of this file.
1//===- LoopInterchange.cpp - Loop interchange pass-------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This Pass handles loop interchange transform.
10// This pass interchanges loops to provide a more cache-friendly memory access
11// patterns.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/SmallSet.h"
19#include "llvm/ADT/Statistic.h"
20#include "llvm/ADT/StringMap.h"
21#include "llvm/ADT/StringRef.h"
30#include "llvm/IR/BasicBlock.h"
32#include "llvm/IR/Dominators.h"
33#include "llvm/IR/Function.h"
34#include "llvm/IR/InstrTypes.h"
35#include "llvm/IR/Instruction.h"
37#include "llvm/IR/User.h"
38#include "llvm/IR/Value.h"
41#include "llvm/Support/Debug.h"
47#include <cassert>
48#include <utility>
49#include <vector>
50
51using namespace llvm;
52
53#define DEBUG_TYPE "loop-interchange"
54
55STATISTIC(LoopsInterchanged, "Number of loops interchanged");
56
58 "loop-interchange-threshold", cl::init(0), cl::Hidden,
59 cl::desc("Interchange if you gain more than this number"));
60
61// Maximum number of load-stores that can be handled in the dependency matrix.
63 "loop-interchange-max-meminstr-count", cl::init(64), cl::Hidden,
65 "Maximum number of load-store instructions that should be handled "
66 "in the dependency matrix. Higher value may lead to more interchanges "
67 "at the cost of compile-time"));
68
69namespace {
70
71using LoopVector = SmallVector<Loop *, 8>;
72
73/// A list of direction vectors. Each entry represents a direction vector
74/// corresponding to one or more dependencies existing in the loop nest. The
75/// length of all direction vectors is equal and is N + 1, where N is the depth
76/// of the loop nest. The first N elements correspond to the dependency
77/// direction of each N loops. The last one indicates whether this entry is
78/// forward dependency ('<') or not ('*'). The term "forward" aligns with what
79/// is defined in LoopAccessAnalysis.
80// TODO: Check if we can use a sparse matrix here.
81using CharMatrix = std::vector<std::vector<char>>;
82
83/// Types of rules used in profitability check.
84enum class RuleTy {
85 PerLoopCacheAnalysis,
86 PerInstrOrderCost,
87 ForVectorization,
88 Ignore
89};
90
91} // end anonymous namespace
92
93// Minimum loop depth supported.
95 "loop-interchange-min-loop-nest-depth", cl::init(2), cl::Hidden,
96 cl::desc("Minimum depth of loop nest considered for the transform"));
97
98// Maximum loop depth supported.
100 "loop-interchange-max-loop-nest-depth", cl::init(10), cl::Hidden,
101 cl::desc("Maximum depth of loop nest considered for the transform"));
102
103// We prefer cache cost to vectorization by default.
105 "loop-interchange-profitabilities", cl::ZeroOrMore,
106 cl::MiscFlags::CommaSeparated, cl::Hidden,
107 cl::desc("List of profitability heuristics to be used. They are applied in "
108 "the given order"),
109 cl::list_init<RuleTy>({RuleTy::PerLoopCacheAnalysis,
110 RuleTy::PerInstrOrderCost,
111 RuleTy::ForVectorization}),
112 cl::values(clEnumValN(RuleTy::PerLoopCacheAnalysis, "cache",
113 "Prioritize loop cache cost"),
114 clEnumValN(RuleTy::PerInstrOrderCost, "instorder",
115 "Prioritize the IVs order of each instruction"),
116 clEnumValN(RuleTy::ForVectorization, "vectorize",
117 "Prioritize vectorization"),
118 clEnumValN(RuleTy::Ignore, "ignore",
119 "Ignore profitability, force interchange (does not "
120 "work with other options)")));
121
122#ifndef NDEBUG
125 for (RuleTy Rule : Rules) {
126 if (!Set.insert(Rule).second)
127 return false;
128 if (Rule == RuleTy::Ignore)
129 return false;
130 }
131 return true;
132}
133
134static void printDepMatrix(CharMatrix &DepMatrix) {
135 for (auto &Row : DepMatrix) {
136 // Drop the last element because it is a flag indicating whether this is
137 // forward dependency or not, which doesn't affect the legality check.
138 for (char D : drop_end(Row))
139 LLVM_DEBUG(dbgs() << D << " ");
140 LLVM_DEBUG(dbgs() << "\n");
141 }
142}
143
144/// Return true if \p Src appears before \p Dst in the same basic block.
145/// Precondition: \p Src and \Dst are distinct instructions within the same
146/// basic block.
147static bool inThisOrder(const Instruction *Src, const Instruction *Dst) {
148 assert(Src->getParent() == Dst->getParent() && Src != Dst &&
149 "Expected Src and Dst to be different instructions in the same BB");
150
151 bool FoundSrc = false;
152 for (const Instruction &I : *(Src->getParent())) {
153 if (&I == Src) {
154 FoundSrc = true;
155 continue;
156 }
157 if (&I == Dst)
158 return FoundSrc;
159 }
160
161 llvm_unreachable("Dst not found");
162}
163#endif
164
165static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
166 Loop *L, DependenceInfo *DI,
167 ScalarEvolution *SE,
169 using ValueVector = SmallVector<Value *, 16>;
170
171 ValueVector MemInstr;
172
173 // For each block.
174 for (BasicBlock *BB : L->blocks()) {
175 // Scan the BB and collect legal loads and stores.
176 for (Instruction &I : *BB) {
177 if (!isa<Instruction>(I))
178 return false;
179 if (auto *Ld = dyn_cast<LoadInst>(&I)) {
180 if (!Ld->isSimple())
181 return false;
182 MemInstr.push_back(&I);
183 } else if (auto *St = dyn_cast<StoreInst>(&I)) {
184 if (!St->isSimple())
185 return false;
186 MemInstr.push_back(&I);
187 }
188 }
189 }
190
191 LLVM_DEBUG(dbgs() << "Found " << MemInstr.size()
192 << " Loads and Stores to analyze\n");
193 if (MemInstr.size() > MaxMemInstrCount) {
194 LLVM_DEBUG(dbgs() << "The transform doesn't support more than "
195 << MaxMemInstrCount << " load/stores in a loop\n");
196 ORE->emit([&]() {
197 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedLoop",
198 L->getStartLoc(), L->getHeader())
199 << "Number of loads/stores exceeded, the supported maximum "
200 "can be increased with option "
201 "-loop-interchange-maxmeminstr-count.";
202 });
203 return false;
204 }
205 ValueVector::iterator I, IE, J, JE;
206
207 // Manage direction vectors that are already seen. Map each direction vector
208 // to an index of DepMatrix at which it is stored.
210
211 for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
212 for (J = I, JE = MemInstr.end(); J != JE; ++J) {
213 std::vector<char> Dep;
214 Instruction *Src = cast<Instruction>(*I);
215 Instruction *Dst = cast<Instruction>(*J);
216 // Ignore Input dependencies.
217 if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
218 continue;
219 // Track Output, Flow, and Anti dependencies.
220 if (auto D = DI->depends(Src, Dst)) {
221 assert(D->isOrdered() && "Expected an output, flow or anti dep.");
222 // If the direction vector is negative, normalize it to
223 // make it non-negative.
224 if (D->normalize(SE))
225 LLVM_DEBUG(dbgs() << "Negative dependence vector normalized.\n");
226 LLVM_DEBUG(StringRef DepType =
227 D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";
228 dbgs() << "Found " << DepType
229 << " dependency between Src and Dst\n"
230 << " Src:" << *Src << "\n Dst:" << *Dst << '\n');
231 unsigned Levels = D->getLevels();
232 char Direction;
233 for (unsigned II = 1; II <= Levels; ++II) {
234 // `DVEntry::LE` is converted to `*`. This is because `LE` means `<`
235 // or `=`, for which we don't have an equivalent representation, so
236 // that the conservative approximation is necessary. The same goes for
237 // `DVEntry::GE`.
238 // TODO: Use of fine-grained expressions allows for more accurate
239 // analysis.
240 unsigned Dir = D->getDirection(II);
241 if (Dir == Dependence::DVEntry::LT)
242 Direction = '<';
243 else if (Dir == Dependence::DVEntry::GT)
244 Direction = '>';
245 else if (Dir == Dependence::DVEntry::EQ)
246 Direction = '=';
247 else
248 Direction = '*';
249 Dep.push_back(Direction);
250 }
251
252 // If the Dependence object doesn't have any information, fill the
253 // dependency vector with '*'.
254 if (D->isConfused()) {
255 assert(Dep.empty() && "Expected empty dependency vector");
256 Dep.assign(Level, '*');
257 }
258
259 while (Dep.size() != Level) {
260 Dep.push_back('I');
261 }
262
263 // Test whether the dependency is forward or not.
264 bool IsKnownForward = true;
265 if (Src->getParent() != Dst->getParent()) {
266 // In general, when Src and Dst are in different BBs, the execution
267 // order of them within a single iteration is not guaranteed. Treat
268 // conservatively as not-forward dependency in this case.
269 IsKnownForward = false;
270 } else {
271 // Src and Dst are in the same BB. If they are the different
272 // instructions, Src should appear before Dst in the BB as they are
273 // stored to MemInstr in that order.
274 assert((Src == Dst || inThisOrder(Src, Dst)) &&
275 "Unexpected instructions");
276
277 // If the Dependence object is reversed (due to normalization), it
278 // represents the dependency from Dst to Src, meaning it is a backward
279 // dependency. Otherwise it should be a forward dependency.
280 bool IsReversed = D->getSrc() != Src;
281 if (IsReversed)
282 IsKnownForward = false;
283 }
284
285 // Initialize the last element. Assume forward dependencies only; it
286 // will be updated later if there is any non-forward dependency.
287 Dep.push_back('<');
288
289 // The last element should express the "summary" among one or more
290 // direction vectors whose first N elements are the same (where N is
291 // the depth of the loop nest). Hence we exclude the last element from
292 // the Seen map.
293 auto [Ite, Inserted] = Seen.try_emplace(
294 StringRef(Dep.data(), Dep.size() - 1), DepMatrix.size());
295
296 // Make sure we only add unique entries to the dependency matrix.
297 if (Inserted)
298 DepMatrix.push_back(Dep);
299
300 // If we cannot prove that this dependency is forward, change the last
301 // element of the corresponding entry. Since a `[... *]` dependency
302 // includes a `[... <]` dependency, we do not need to keep both and
303 // change the existing entry instead.
304 if (!IsKnownForward)
305 DepMatrix[Ite->second].back() = '*';
306 }
307 }
308 }
309
310 return true;
311}
312
313// A loop is moved from index 'from' to an index 'to'. Update the Dependence
314// matrix by exchanging the two columns.
315static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx,
316 unsigned ToIndx) {
317 for (auto &Row : DepMatrix)
318 std::swap(Row[ToIndx], Row[FromIndx]);
319}
320
321// Check if a direction vector is lexicographically positive. Return true if it
322// is positive, nullopt if it is "zero", otherwise false.
323// [Theorem] A permutation of the loops in a perfect nest is legal if and only
324// if the direction matrix, after the same permutation is applied to its
325// columns, has no ">" direction as the leftmost non-"=" direction in any row.
326static std::optional<bool>
327isLexicographicallyPositive(ArrayRef<char> DV, unsigned Begin, unsigned End) {
328 for (unsigned char Direction : DV.slice(Begin, End - Begin)) {
329 if (Direction == '<')
330 return true;
331 if (Direction == '>' || Direction == '*')
332 return false;
333 }
334 return std::nullopt;
335}
336
337// Checks if it is legal to interchange 2 loops.
338static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
339 unsigned InnerLoopId,
340 unsigned OuterLoopId) {
341 unsigned NumRows = DepMatrix.size();
342 std::vector<char> Cur;
343 // For each row check if it is valid to interchange.
344 for (unsigned Row = 0; Row < NumRows; ++Row) {
345 // Create temporary DepVector check its lexicographical order
346 // before and after swapping OuterLoop vs InnerLoop
347 Cur = DepMatrix[Row];
348
349 // If the surrounding loops already ensure that the direction vector is
350 // lexicographically positive, nothing within the loop will be able to break
351 // the dependence. In such a case we can skip the subsequent check.
352 if (isLexicographicallyPositive(Cur, 0, OuterLoopId) == true)
353 continue;
354
355 // Check if the direction vector is lexicographically positive (or zero)
356 // for both before/after exchanged. Ignore the last element because it
357 // doesn't affect the legality.
358 if (isLexicographicallyPositive(Cur, OuterLoopId, Cur.size() - 1) == false)
359 return false;
360 std::swap(Cur[InnerLoopId], Cur[OuterLoopId]);
361 if (isLexicographicallyPositive(Cur, OuterLoopId, Cur.size() - 1) == false)
362 return false;
363 }
364 return true;
365}
366
367static void populateWorklist(Loop &L, LoopVector &LoopList) {
368 LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "
369 << L.getHeader()->getParent()->getName() << " Loop: %"
370 << L.getHeader()->getName() << '\n');
371 assert(LoopList.empty() && "LoopList should initially be empty!");
372 Loop *CurrentLoop = &L;
373 const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();
374 while (!Vec->empty()) {
375 // The current loop has multiple subloops in it hence it is not tightly
376 // nested.
377 // Discard all loops above it added into Worklist.
378 if (Vec->size() != 1) {
379 LoopList = {};
380 return;
381 }
382
383 LoopList.push_back(CurrentLoop);
384 CurrentLoop = Vec->front();
385 Vec = &CurrentLoop->getSubLoops();
386 }
387 LoopList.push_back(CurrentLoop);
388}
389
392 unsigned LoopNestDepth = LoopList.size();
393 if (LoopNestDepth < MinLoopNestDepth || LoopNestDepth > MaxLoopNestDepth) {
394 LLVM_DEBUG(dbgs() << "Unsupported depth of loop nest " << LoopNestDepth
395 << ", the supported range is [" << MinLoopNestDepth
396 << ", " << MaxLoopNestDepth << "].\n");
397 Loop *OuterLoop = LoopList.front();
398 ORE.emit([&]() {
399 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedLoopNestDepth",
400 OuterLoop->getStartLoc(),
401 OuterLoop->getHeader())
402 << "Unsupported depth of loop nest, the supported range is ["
403 << std::to_string(MinLoopNestDepth) << ", "
404 << std::to_string(MaxLoopNestDepth) << "].\n";
405 });
406 return false;
407 }
408 return true;
409}
410
412 ArrayRef<Loop *> LoopList) {
413 for (Loop *L : LoopList) {
414 const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
415 if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
416 LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n");
417 return false;
418 }
419 if (L->getNumBackEdges() != 1) {
420 LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n");
421 return false;
422 }
423 if (!L->getExitingBlock()) {
424 LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n");
425 return false;
426 }
427 }
428 return true;
429}
430
431namespace {
432
433/// LoopInterchangeLegality checks if it is legal to interchange the loop.
434class LoopInterchangeLegality {
435public:
436 LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
438 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
439
440 /// Check if the loops can be interchanged.
441 bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
442 CharMatrix &DepMatrix);
443
444 /// Discover induction PHIs in the header of \p L. Induction
445 /// PHIs are added to \p Inductions.
446 bool findInductions(Loop *L, SmallVectorImpl<PHINode *> &Inductions);
447
448 /// Check if the loop structure is understood. We do not handle triangular
449 /// loops for now.
450 bool isLoopStructureUnderstood();
451
452 bool currentLimitations();
453
454 const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const {
455 return OuterInnerReductions;
456 }
457
458 const ArrayRef<PHINode *> getInnerLoopInductions() const {
459 return InnerLoopInductions;
460 }
461
462 ArrayRef<Instruction *> getHasNoWrapReductions() const {
463 return HasNoWrapReductions;
464 }
465
466private:
467 bool tightlyNested(Loop *Outer, Loop *Inner);
468 bool containsUnsafeInstructions(BasicBlock *BB);
469
470 /// Discover induction and reduction PHIs in the header of \p L. Induction
471 /// PHIs are added to \p Inductions, reductions are added to
472 /// OuterInnerReductions. When the outer loop is passed, the inner loop needs
473 /// to be passed as \p InnerLoop.
474 bool findInductionAndReductions(Loop *L,
475 SmallVector<PHINode *, 8> &Inductions,
476 Loop *InnerLoop);
477
478 Loop *OuterLoop;
479 Loop *InnerLoop;
480
481 ScalarEvolution *SE;
482
483 /// Interface to emit optimization remarks.
485
486 /// Set of reduction PHIs taking part of a reduction across the inner and
487 /// outer loop.
488 SmallPtrSet<PHINode *, 4> OuterInnerReductions;
489
490 /// Set of inner loop induction PHIs
491 SmallVector<PHINode *, 8> InnerLoopInductions;
492
493 /// Hold instructions that have nuw/nsw flags and involved in reductions,
494 /// like integer addition/multiplication. Those flags must be dropped when
495 /// interchanging the loops.
496 SmallVector<Instruction *, 4> HasNoWrapReductions;
497};
498
499/// Manages information utilized by the profitability check for cache. The main
500/// purpose of this class is to delay the computation of CacheCost until it is
501/// actually needed.
502class CacheCostManager {
503 Loop *OutermostLoop;
505 DependenceInfo *DI;
506
507 /// CacheCost for \ref OutermostLoop. Once it is computed, it is cached. Note
508 /// that the result can be nullptr.
509 std::optional<std::unique_ptr<CacheCost>> CC;
510
511 /// Maps each loop to an index representing the optimal position within the
512 /// loop-nest, as determined by the cache cost analysis.
514
515 void computeIfUnitinialized();
516
517public:
518 CacheCostManager(Loop *OutermostLoop, LoopStandardAnalysisResults *AR,
519 DependenceInfo *DI)
520 : OutermostLoop(OutermostLoop), AR(AR), DI(DI) {}
521 CacheCost *getCacheCost();
522 const DenseMap<const Loop *, unsigned> &getCostMap();
523};
524
525/// LoopInterchangeProfitability checks if it is profitable to interchange the
526/// loop.
527class LoopInterchangeProfitability {
528public:
529 LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
531 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
532
533 /// Check if the loop interchange is profitable.
534 bool isProfitable(const Loop *InnerLoop, const Loop *OuterLoop,
535 unsigned InnerLoopId, unsigned OuterLoopId,
536 CharMatrix &DepMatrix, CacheCostManager &CCM);
537
538private:
539 int getInstrOrderCost();
540 std::optional<bool> isProfitablePerLoopCacheAnalysis(
541 const DenseMap<const Loop *, unsigned> &CostMap, CacheCost *CC);
542 std::optional<bool> isProfitablePerInstrOrderCost();
543 std::optional<bool> isProfitableForVectorization(unsigned InnerLoopId,
544 unsigned OuterLoopId,
545 CharMatrix &DepMatrix);
546 Loop *OuterLoop;
547 Loop *InnerLoop;
548
549 /// Scev analysis.
550 ScalarEvolution *SE;
551
552 /// Interface to emit optimization remarks.
554};
555
556/// LoopInterchangeTransform interchanges the loop.
557class LoopInterchangeTransform {
558public:
559 LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
560 LoopInfo *LI, DominatorTree *DT,
561 const LoopInterchangeLegality &LIL)
562 : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}
563
564 /// Interchange OuterLoop and InnerLoop.
565 bool transform(ArrayRef<Instruction *> DropNoWrapInsts);
566 void restructureLoops(Loop *NewInner, Loop *NewOuter,
567 BasicBlock *OrigInnerPreHeader,
568 BasicBlock *OrigOuterPreHeader);
569 void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
570
571private:
572 bool adjustLoopLinks();
573 bool adjustLoopBranches();
574
575 Loop *OuterLoop;
576 Loop *InnerLoop;
577
578 /// Scev analysis.
579 ScalarEvolution *SE;
580
581 LoopInfo *LI;
582 DominatorTree *DT;
583
584 const LoopInterchangeLegality &LIL;
585};
586
587struct LoopInterchange {
588 ScalarEvolution *SE = nullptr;
589 LoopInfo *LI = nullptr;
590 DependenceInfo *DI = nullptr;
591 DominatorTree *DT = nullptr;
592 LoopStandardAnalysisResults *AR = nullptr;
593
594 /// Interface to emit optimization remarks.
596
597 LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
600 : SE(SE), LI(LI), DI(DI), DT(DT), AR(AR), ORE(ORE) {}
601
602 bool run(Loop *L) {
603 if (L->getParentLoop())
604 return false;
605 SmallVector<Loop *, 8> LoopList;
606 populateWorklist(*L, LoopList);
607 return processLoopList(LoopList);
608 }
609
610 bool run(LoopNest &LN) {
611 SmallVector<Loop *, 8> LoopList(LN.getLoops());
612 for (unsigned I = 1; I < LoopList.size(); ++I)
613 if (LoopList[I]->getParentLoop() != LoopList[I - 1])
614 return false;
615 return processLoopList(LoopList);
616 }
617
618 unsigned selectLoopForInterchange(ArrayRef<Loop *> LoopList) {
619 // TODO: Add a better heuristic to select the loop to be interchanged based
620 // on the dependence matrix. Currently we select the innermost loop.
621 return LoopList.size() - 1;
622 }
623
624 bool processLoopList(SmallVectorImpl<Loop *> &LoopList) {
625 bool Changed = false;
626
627 // Ensure proper loop nest depth.
628 assert(hasSupportedLoopDepth(LoopList, *ORE) &&
629 "Unsupported depth of loop nest.");
630
631 unsigned LoopNestDepth = LoopList.size();
632
633 LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepth
634 << "\n");
635
636 CharMatrix DependencyMatrix;
637 Loop *OuterMostLoop = *(LoopList.begin());
638 if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
639 OuterMostLoop, DI, SE, ORE)) {
640 LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n");
641 return false;
642 }
643
644 LLVM_DEBUG(dbgs() << "Dependency matrix before interchange:\n";
645 printDepMatrix(DependencyMatrix));
646
647 // Get the Outermost loop exit.
648 BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock();
649 if (!LoopNestExit) {
650 LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block");
651 return false;
652 }
653
654 unsigned SelecLoopId = selectLoopForInterchange(LoopList);
655 CacheCostManager CCM(LoopList[0], AR, DI);
656 // We try to achieve the globally optimal memory access for the loopnest,
657 // and do interchange based on a bubble-sort fasion. We start from
658 // the innermost loop, move it outwards to the best possible position
659 // and repeat this process.
660 for (unsigned j = SelecLoopId; j > 0; j--) {
661 bool ChangedPerIter = false;
662 for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) {
663 bool Interchanged =
664 processLoop(LoopList, i, i - 1, DependencyMatrix, CCM);
665 ChangedPerIter |= Interchanged;
666 Changed |= Interchanged;
667 }
668 // Early abort if there was no interchange during an entire round of
669 // moving loops outwards.
670 if (!ChangedPerIter)
671 break;
672 }
673 return Changed;
674 }
675
676 bool processLoop(SmallVectorImpl<Loop *> &LoopList, unsigned InnerLoopId,
677 unsigned OuterLoopId,
678 std::vector<std::vector<char>> &DependencyMatrix,
679 CacheCostManager &CCM) {
680 Loop *OuterLoop = LoopList[OuterLoopId];
681 Loop *InnerLoop = LoopList[InnerLoopId];
682 LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId
683 << " and OuterLoopId = " << OuterLoopId << "\n");
684 LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
685 if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
686 LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n");
687 return false;
688 }
689 LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
690 LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
691 if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
692 DependencyMatrix, CCM)) {
693 LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
694 return false;
695 }
696
697 ORE->emit([&]() {
698 return OptimizationRemark(DEBUG_TYPE, "Interchanged",
699 InnerLoop->getStartLoc(),
700 InnerLoop->getHeader())
701 << "Loop interchanged with enclosing loop.";
702 });
703
704 LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
705 LIT.transform(LIL.getHasNoWrapReductions());
706 LLVM_DEBUG(dbgs() << "Loops interchanged.\n");
707 LoopsInterchanged++;
708
709 llvm::formLCSSARecursively(*OuterLoop, *DT, LI, SE);
710
711 // Loops interchanged, update LoopList accordingly.
712 std::swap(LoopList[OuterLoopId], LoopList[InnerLoopId]);
713 // Update the DependencyMatrix
714 interChangeDependencies(DependencyMatrix, InnerLoopId, OuterLoopId);
715
716 LLVM_DEBUG(dbgs() << "Dependency matrix after interchange:\n";
717 printDepMatrix(DependencyMatrix));
718
719 return true;
720 }
721};
722
723} // end anonymous namespace
724
725bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) {
726 return any_of(*BB, [](const Instruction &I) {
727 return I.mayHaveSideEffects() || I.mayReadFromMemory();
728 });
729}
730
731bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
732 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
733 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
734 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
735
736 LLVM_DEBUG(dbgs() << "Checking if loops are tightly nested\n");
737
738 // A perfectly nested loop will not have any branch in between the outer and
739 // inner block i.e. outer header will branch to either inner preheader and
740 // outerloop latch.
741 BranchInst *OuterLoopHeaderBI =
742 dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
743 if (!OuterLoopHeaderBI)
744 return false;
745
746 for (BasicBlock *Succ : successors(OuterLoopHeaderBI))
747 if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() &&
748 Succ != OuterLoopLatch)
749 return false;
750
751 LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n");
752 // We do not have any basic block in between now make sure the outer header
753 // and outer loop latch doesn't contain any unsafe instructions.
754 if (containsUnsafeInstructions(OuterLoopHeader) ||
755 containsUnsafeInstructions(OuterLoopLatch))
756 return false;
757
758 // Also make sure the inner loop preheader does not contain any unsafe
759 // instructions. Note that all instructions in the preheader will be moved to
760 // the outer loop header when interchanging.
761 if (InnerLoopPreHeader != OuterLoopHeader &&
762 containsUnsafeInstructions(InnerLoopPreHeader))
763 return false;
764
765 BasicBlock *InnerLoopExit = InnerLoop->getExitBlock();
766 // Ensure the inner loop exit block flows to the outer loop latch possibly
767 // through empty blocks.
768 const BasicBlock &SuccInner =
769 LoopNest::skipEmptyBlockUntil(InnerLoopExit, OuterLoopLatch);
770 if (&SuccInner != OuterLoopLatch) {
771 LLVM_DEBUG(dbgs() << "Inner loop exit block " << *InnerLoopExit
772 << " does not lead to the outer loop latch.\n";);
773 return false;
774 }
775 // The inner loop exit block does flow to the outer loop latch and not some
776 // other BBs, now make sure it contains safe instructions, since it will be
777 // moved into the (new) inner loop after interchange.
778 if (containsUnsafeInstructions(InnerLoopExit))
779 return false;
780
781 LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n");
782 // We have a perfect loop nest.
783 return true;
784}
785
786bool LoopInterchangeLegality::isLoopStructureUnderstood() {
787 BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
788 for (PHINode *InnerInduction : InnerLoopInductions) {
789 unsigned Num = InnerInduction->getNumOperands();
790 for (unsigned i = 0; i < Num; ++i) {
791 Value *Val = InnerInduction->getOperand(i);
792 if (isa<Constant>(Val))
793 continue;
794 Instruction *I = dyn_cast<Instruction>(Val);
795 if (!I)
796 return false;
797 // TODO: Handle triangular loops.
798 // e.g. for(int i=0;i<N;i++)
799 // for(int j=i;j<N;j++)
800 unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i);
801 if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
802 InnerLoopPreheader &&
803 !OuterLoop->isLoopInvariant(I)) {
804 return false;
805 }
806 }
807 }
808
809 // TODO: Handle triangular loops of another form.
810 // e.g. for(int i=0;i<N;i++)
811 // for(int j=0;j<i;j++)
812 // or,
813 // for(int i=0;i<N;i++)
814 // for(int j=0;j*i<N;j++)
815 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
816 BranchInst *InnerLoopLatchBI =
817 dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
818 if (!InnerLoopLatchBI->isConditional())
819 return false;
820 if (CmpInst *InnerLoopCmp =
821 dyn_cast<CmpInst>(InnerLoopLatchBI->getCondition())) {
822 Value *Op0 = InnerLoopCmp->getOperand(0);
823 Value *Op1 = InnerLoopCmp->getOperand(1);
824
825 // LHS and RHS of the inner loop exit condition, e.g.,
826 // in "for(int j=0;j<i;j++)", LHS is j and RHS is i.
827 Value *Left = nullptr;
828 Value *Right = nullptr;
829
830 // Check if V only involves inner loop induction variable.
831 // Return true if V is InnerInduction, or a cast from
832 // InnerInduction, or a binary operator that involves
833 // InnerInduction and a constant.
834 std::function<bool(Value *)> IsPathToInnerIndVar;
835 IsPathToInnerIndVar = [this, &IsPathToInnerIndVar](const Value *V) -> bool {
836 if (llvm::is_contained(InnerLoopInductions, V))
837 return true;
838 if (isa<Constant>(V))
839 return true;
840 const Instruction *I = dyn_cast<Instruction>(V);
841 if (!I)
842 return false;
843 if (isa<CastInst>(I))
844 return IsPathToInnerIndVar(I->getOperand(0));
845 if (isa<BinaryOperator>(I))
846 return IsPathToInnerIndVar(I->getOperand(0)) &&
847 IsPathToInnerIndVar(I->getOperand(1));
848 return false;
849 };
850
851 // In case of multiple inner loop indvars, it is okay if LHS and RHS
852 // are both inner indvar related variables.
853 if (IsPathToInnerIndVar(Op0) && IsPathToInnerIndVar(Op1))
854 return true;
855
856 // Otherwise we check if the cmp instruction compares an inner indvar
857 // related variable (Left) with a outer loop invariant (Right).
858 if (IsPathToInnerIndVar(Op0) && !isa<Constant>(Op0)) {
859 Left = Op0;
860 Right = Op1;
861 } else if (IsPathToInnerIndVar(Op1) && !isa<Constant>(Op1)) {
862 Left = Op1;
863 Right = Op0;
864 }
865
866 if (Left == nullptr)
867 return false;
868
869 const SCEV *S = SE->getSCEV(Right);
870 if (!SE->isLoopInvariant(S, OuterLoop))
871 return false;
872 }
873
874 return true;
875}
876
877// If SV is a LCSSA PHI node with a single incoming value, return the incoming
878// value.
879static Value *followLCSSA(Value *SV) {
880 PHINode *PHI = dyn_cast<PHINode>(SV);
881 if (!PHI)
882 return SV;
883
884 if (PHI->getNumIncomingValues() != 1)
885 return SV;
886 return followLCSSA(PHI->getIncomingValue(0));
887}
888
889// Check V's users to see if it is involved in a reduction in L.
890static PHINode *
892 SmallVectorImpl<Instruction *> &HasNoWrapInsts) {
893 // Reduction variables cannot be constants.
894 if (isa<Constant>(V))
895 return nullptr;
896
897 for (Value *User : V->users()) {
898 if (PHINode *PHI = dyn_cast<PHINode>(User)) {
899 if (PHI->getNumIncomingValues() == 1)
900 continue;
903 // Detect floating point reduction only when it can be reordered.
904 if (RD.getExactFPMathInst() != nullptr)
905 return nullptr;
906
908 switch (RK) {
909 case RecurKind::Or:
910 case RecurKind::And:
911 case RecurKind::Xor:
912 case RecurKind::SMin:
913 case RecurKind::SMax:
914 case RecurKind::UMin:
915 case RecurKind::UMax:
916 case RecurKind::FAdd:
917 case RecurKind::FMul:
918 case RecurKind::FMin:
919 case RecurKind::FMax:
920 case RecurKind::FMinimum:
921 case RecurKind::FMaximum:
922 case RecurKind::FMinimumNum:
923 case RecurKind::FMaximumNum:
924 case RecurKind::FMulAdd:
925 case RecurKind::AnyOf:
926 return PHI;
927
928 // Change the order of integer addition/multiplication may change the
929 // semantics. Consider the following case:
930 //
931 // int A[2][2] = {{ INT_MAX, INT_MAX }, { INT_MIN, INT_MIN }};
932 // int sum = 0;
933 // for (int i = 0; i < 2; i++)
934 // for (int j = 0; j < 2; j++)
935 // sum += A[j][i];
936 //
937 // If the above loops are exchanged, the addition will cause an
938 // overflow. To prevent this, we must drop the nuw/nsw flags from the
939 // addition/multiplication instructions when we actually exchanges the
940 // loops.
941 case RecurKind::Add:
942 case RecurKind::Mul: {
943 unsigned OpCode = RecurrenceDescriptor::getOpcode(RK);
945
946 // Bail out when we fail to collect reduction instructions chain.
947 if (Ops.empty())
948 return nullptr;
949
950 for (Instruction *I : Ops) {
951 assert(I->getOpcode() == OpCode &&
952 "Expected the instruction to be the reduction operation");
953 (void)OpCode;
954
955 // If the instruction has nuw/nsw flags, we must drop them when the
956 // transformation is actually performed.
957 if (I->hasNoSignedWrap() || I->hasNoUnsignedWrap())
958 HasNoWrapInsts.push_back(I);
959 }
960 return PHI;
961 }
962
963 default:
964 return nullptr;
965 }
966 }
967 return nullptr;
968 }
969 }
970
971 return nullptr;
972}
973
974bool LoopInterchangeLegality::findInductionAndReductions(
975 Loop *L, SmallVector<PHINode *, 8> &Inductions, Loop *InnerLoop) {
976 if (!L->getLoopLatch() || !L->getLoopPredecessor())
977 return false;
978 for (PHINode &PHI : L->getHeader()->phis()) {
981 Inductions.push_back(&PHI);
982 else {
983 // PHIs in inner loops need to be part of a reduction in the outer loop,
984 // discovered when checking the PHIs of the outer loop earlier.
985 if (!InnerLoop) {
986 if (!OuterInnerReductions.count(&PHI)) {
987 LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions "
988 "across the outer loop.\n");
989 return false;
990 }
991 } else {
992 assert(PHI.getNumIncomingValues() == 2 &&
993 "Phis in loop header should have exactly 2 incoming values");
994 // Check if we have a PHI node in the outer loop that has a reduction
995 // result from the inner loop as an incoming value.
996 Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch()));
997 PHINode *InnerRedPhi =
998 findInnerReductionPhi(InnerLoop, V, HasNoWrapReductions);
999 if (!InnerRedPhi ||
1000 !llvm::is_contained(InnerRedPhi->incoming_values(), &PHI)) {
1001 LLVM_DEBUG(
1002 dbgs()
1003 << "Failed to recognize PHI as an induction or reduction.\n");
1004 return false;
1005 }
1006 OuterInnerReductions.insert(&PHI);
1007 OuterInnerReductions.insert(InnerRedPhi);
1008 }
1009 }
1010 }
1011 return true;
1012}
1013
1014// This function indicates the current limitations in the transform as a result
1015// of which we do not proceed.
1016bool LoopInterchangeLegality::currentLimitations() {
1017 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1018
1019 // transform currently expects the loop latches to also be the exiting
1020 // blocks.
1021 if (InnerLoop->getExitingBlock() != InnerLoopLatch ||
1022 OuterLoop->getExitingBlock() != OuterLoop->getLoopLatch() ||
1023 !isa<BranchInst>(InnerLoopLatch->getTerminator()) ||
1024 !isa<BranchInst>(OuterLoop->getLoopLatch()->getTerminator())) {
1025 LLVM_DEBUG(
1026 dbgs() << "Loops where the latch is not the exiting block are not"
1027 << " supported currently.\n");
1028 ORE->emit([&]() {
1029 return OptimizationRemarkMissed(DEBUG_TYPE, "ExitingNotLatch",
1030 OuterLoop->getStartLoc(),
1031 OuterLoop->getHeader())
1032 << "Loops where the latch is not the exiting block cannot be"
1033 " interchange currently.";
1034 });
1035 return true;
1036 }
1037
1038 SmallVector<PHINode *, 8> Inductions;
1039 if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
1040 LLVM_DEBUG(
1041 dbgs() << "Only outer loops with induction or reduction PHI nodes "
1042 << "are supported currently.\n");
1043 ORE->emit([&]() {
1044 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIOuter",
1045 OuterLoop->getStartLoc(),
1046 OuterLoop->getHeader())
1047 << "Only outer loops with induction or reduction PHI nodes can be"
1048 " interchanged currently.";
1049 });
1050 return true;
1051 }
1052
1053 Inductions.clear();
1054 // For multi-level loop nests, make sure that all phi nodes for inner loops
1055 // at all levels can be recognized as a induction or reduction phi. Bail out
1056 // if a phi node at a certain nesting level cannot be properly recognized.
1057 Loop *CurLevelLoop = OuterLoop;
1058 while (!CurLevelLoop->getSubLoops().empty()) {
1059 // We already made sure that the loop nest is tightly nested.
1060 CurLevelLoop = CurLevelLoop->getSubLoops().front();
1061 if (!findInductionAndReductions(CurLevelLoop, Inductions, nullptr)) {
1062 LLVM_DEBUG(
1063 dbgs() << "Only inner loops with induction or reduction PHI nodes "
1064 << "are supported currently.\n");
1065 ORE->emit([&]() {
1066 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner",
1067 CurLevelLoop->getStartLoc(),
1068 CurLevelLoop->getHeader())
1069 << "Only inner loops with induction or reduction PHI nodes can be"
1070 " interchange currently.";
1071 });
1072 return true;
1073 }
1074 }
1075
1076 // TODO: Triangular loops are not handled for now.
1077 if (!isLoopStructureUnderstood()) {
1078 LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n");
1079 ORE->emit([&]() {
1080 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedStructureInner",
1081 InnerLoop->getStartLoc(),
1082 InnerLoop->getHeader())
1083 << "Inner loop structure not understood currently.";
1084 });
1085 return true;
1086 }
1087
1088 return false;
1089}
1090
1091bool LoopInterchangeLegality::findInductions(
1092 Loop *L, SmallVectorImpl<PHINode *> &Inductions) {
1093 for (PHINode &PHI : L->getHeader()->phis()) {
1096 Inductions.push_back(&PHI);
1097 }
1098 return !Inductions.empty();
1099}
1100
1101// We currently only support LCSSA PHI nodes in the inner loop exit, if their
1102// users are either reduction PHIs or PHIs outside the outer loop (which means
1103// the we are only interested in the final value after the loop).
1104static bool
1107 BasicBlock *InnerExit = OuterL->getUniqueExitBlock();
1108 for (PHINode &PHI : InnerExit->phis()) {
1109 // Reduction lcssa phi will have only 1 incoming block that from loop latch.
1110 if (PHI.getNumIncomingValues() > 1)
1111 return false;
1112 if (any_of(PHI.users(), [&Reductions, OuterL](User *U) {
1113 PHINode *PN = dyn_cast<PHINode>(U);
1114 return !PN ||
1115 (!Reductions.count(PN) && OuterL->contains(PN->getParent()));
1116 })) {
1117 return false;
1118 }
1119 }
1120 return true;
1121}
1122
1123// We currently support LCSSA PHI nodes in the outer loop exit, if their
1124// incoming values do not come from the outer loop latch or if the
1125// outer loop latch has a single predecessor. In that case, the value will
1126// be available if both the inner and outer loop conditions are true, which
1127// will still be true after interchanging. If we have multiple predecessor,
1128// that may not be the case, e.g. because the outer loop latch may be executed
1129// if the inner loop is not executed.
1130static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
1131 BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock();
1132 for (PHINode &PHI : LoopNestExit->phis()) {
1133 for (Value *Incoming : PHI.incoming_values()) {
1134 Instruction *IncomingI = dyn_cast<Instruction>(Incoming);
1135 if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
1136 continue;
1137
1138 // The incoming value is defined in the outer loop latch. Currently we
1139 // only support that in case the outer loop latch has a single predecessor.
1140 // This guarantees that the outer loop latch is executed if and only if
1141 // the inner loop is executed (because tightlyNested() guarantees that the
1142 // outer loop header only branches to the inner loop or the outer loop
1143 // latch).
1144 // FIXME: We could weaken this logic and allow multiple predecessors,
1145 // if the values are produced outside the loop latch. We would need
1146 // additional logic to update the PHI nodes in the exit block as
1147 // well.
1148 if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr)
1149 return false;
1150 }
1151 }
1152 return true;
1153}
1154
1155// In case of multi-level nested loops, it may occur that lcssa phis exist in
1156// the latch of InnerLoop, i.e., when defs of the incoming values are further
1157// inside the loopnest. Sometimes those incoming values are not available
1158// after interchange, since the original inner latch will become the new outer
1159// latch which may have predecessor paths that do not include those incoming
1160// values.
1161// TODO: Handle transformation of lcssa phis in the InnerLoop latch in case of
1162// multi-level loop nests.
1163static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
1164 if (InnerLoop->getSubLoops().empty())
1165 return true;
1166 // If the original outer latch has only one predecessor, then values defined
1167 // further inside the looploop, e.g., in the innermost loop, will be available
1168 // at the new outer latch after interchange.
1169 if (OuterLoop->getLoopLatch()->getUniquePredecessor() != nullptr)
1170 return true;
1171
1172 // The outer latch has more than one predecessors, i.e., the inner
1173 // exit and the inner header.
1174 // PHI nodes in the inner latch are lcssa phis where the incoming values
1175 // are defined further inside the loopnest. Check if those phis are used
1176 // in the original inner latch. If that is the case then bail out since
1177 // those incoming values may not be available at the new outer latch.
1178 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1179 for (PHINode &PHI : InnerLoopLatch->phis()) {
1180 for (auto *U : PHI.users()) {
1181 Instruction *UI = cast<Instruction>(U);
1182 if (InnerLoopLatch == UI->getParent())
1183 return false;
1184 }
1185 }
1186 return true;
1187}
1188
1189bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
1190 unsigned OuterLoopId,
1191 CharMatrix &DepMatrix) {
1192 if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) {
1193 LLVM_DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId
1194 << " and OuterLoopId = " << OuterLoopId
1195 << " due to dependence\n");
1196 ORE->emit([&]() {
1197 return OptimizationRemarkMissed(DEBUG_TYPE, "Dependence",
1198 InnerLoop->getStartLoc(),
1199 InnerLoop->getHeader())
1200 << "Cannot interchange loops due to dependences.";
1201 });
1202 return false;
1203 }
1204 // Check if outer and inner loop contain legal instructions only.
1205 for (auto *BB : OuterLoop->blocks())
1207 if (CallInst *CI = dyn_cast<CallInst>(&I)) {
1208 // readnone functions do not prevent interchanging.
1209 if (CI->onlyWritesMemory())
1210 continue;
1211 LLVM_DEBUG(
1212 dbgs() << "Loops with call instructions cannot be interchanged "
1213 << "safely.");
1214 ORE->emit([&]() {
1215 return OptimizationRemarkMissed(DEBUG_TYPE, "CallInst",
1216 CI->getDebugLoc(),
1217 CI->getParent())
1218 << "Cannot interchange loops due to call instruction.";
1219 });
1220
1221 return false;
1222 }
1223
1224 if (!findInductions(InnerLoop, InnerLoopInductions)) {
1225 LLVM_DEBUG(dbgs() << "Could not find inner loop induction variables.\n");
1226 return false;
1227 }
1228
1229 if (!areInnerLoopLatchPHIsSupported(OuterLoop, InnerLoop)) {
1230 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop latch.\n");
1231 ORE->emit([&]() {
1232 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedInnerLatchPHI",
1233 InnerLoop->getStartLoc(),
1234 InnerLoop->getHeader())
1235 << "Cannot interchange loops because unsupported PHI nodes found "
1236 "in inner loop latch.";
1237 });
1238 return false;
1239 }
1240
1241 // TODO: The loops could not be interchanged due to current limitations in the
1242 // transform module.
1243 if (currentLimitations()) {
1244 LLVM_DEBUG(dbgs() << "Not legal because of current transform limitation\n");
1245 return false;
1246 }
1247
1248 // Check if the loops are tightly nested.
1249 if (!tightlyNested(OuterLoop, InnerLoop)) {
1250 LLVM_DEBUG(dbgs() << "Loops not tightly nested\n");
1251 ORE->emit([&]() {
1252 return OptimizationRemarkMissed(DEBUG_TYPE, "NotTightlyNested",
1253 InnerLoop->getStartLoc(),
1254 InnerLoop->getHeader())
1255 << "Cannot interchange loops because they are not tightly "
1256 "nested.";
1257 });
1258 return false;
1259 }
1260
1261 if (!areInnerLoopExitPHIsSupported(OuterLoop, InnerLoop,
1262 OuterInnerReductions)) {
1263 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop exit.\n");
1264 ORE->emit([&]() {
1265 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
1266 InnerLoop->getStartLoc(),
1267 InnerLoop->getHeader())
1268 << "Found unsupported PHI node in loop exit.";
1269 });
1270 return false;
1271 }
1272
1273 if (!areOuterLoopExitPHIsSupported(OuterLoop, InnerLoop)) {
1274 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n");
1275 ORE->emit([&]() {
1276 return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
1277 OuterLoop->getStartLoc(),
1278 OuterLoop->getHeader())
1279 << "Found unsupported PHI node in loop exit.";
1280 });
1281 return false;
1282 }
1283
1284 return true;
1285}
1286
1287void CacheCostManager::computeIfUnitinialized() {
1288 if (CC.has_value())
1289 return;
1290
1291 LLVM_DEBUG(dbgs() << "Compute CacheCost.\n");
1292 CC = CacheCost::getCacheCost(*OutermostLoop, *AR, *DI);
1293 // Obtain the loop vector returned from loop cache analysis beforehand,
1294 // and put each <Loop, index> pair into a map for constant time query
1295 // later. Indices in loop vector reprsent the optimal order of the
1296 // corresponding loop, e.g., given a loopnest with depth N, index 0
1297 // indicates the loop should be placed as the outermost loop and index N
1298 // indicates the loop should be placed as the innermost loop.
1299 //
1300 // For the old pass manager CacheCost would be null.
1301 if (*CC != nullptr)
1302 for (const auto &[Idx, Cost] : enumerate((*CC)->getLoopCosts()))
1303 CostMap[Cost.first] = Idx;
1304}
1305
1306CacheCost *CacheCostManager::getCacheCost() {
1307 computeIfUnitinialized();
1308 return CC->get();
1309}
1310
1311const DenseMap<const Loop *, unsigned> &CacheCostManager::getCostMap() {
1312 computeIfUnitinialized();
1313 return CostMap;
1314}
1315
1316int LoopInterchangeProfitability::getInstrOrderCost() {
1317 unsigned GoodOrder, BadOrder;
1318 BadOrder = GoodOrder = 0;
1319 for (BasicBlock *BB : InnerLoop->blocks()) {
1320 for (Instruction &Ins : *BB) {
1321 if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
1322 bool FoundInnerInduction = false;
1323 bool FoundOuterInduction = false;
1324 for (Value *Op : GEP->operands()) {
1325 // Skip operands that are not SCEV-able.
1326 if (!SE->isSCEVable(Op->getType()))
1327 continue;
1328
1329 const SCEV *OperandVal = SE->getSCEV(Op);
1330 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
1331 if (!AR)
1332 continue;
1333
1334 // If we find the inner induction after an outer induction e.g.
1335 // for(int i=0;i<N;i++)
1336 // for(int j=0;j<N;j++)
1337 // A[i][j] = A[i-1][j-1]+k;
1338 // then it is a good order.
1339 if (AR->getLoop() == InnerLoop) {
1340 // We found an InnerLoop induction after OuterLoop induction. It is
1341 // a good order.
1342 FoundInnerInduction = true;
1343 if (FoundOuterInduction) {
1344 GoodOrder++;
1345 break;
1346 }
1347 }
1348 // If we find the outer induction after an inner induction e.g.
1349 // for(int i=0;i<N;i++)
1350 // for(int j=0;j<N;j++)
1351 // A[j][i] = A[j-1][i-1]+k;
1352 // then it is a bad order.
1353 if (AR->getLoop() == OuterLoop) {
1354 // We found an OuterLoop induction after InnerLoop induction. It is
1355 // a bad order.
1356 FoundOuterInduction = true;
1357 if (FoundInnerInduction) {
1358 BadOrder++;
1359 break;
1360 }
1361 }
1362 }
1363 }
1364 }
1365 }
1366 return GoodOrder - BadOrder;
1367}
1368
1369std::optional<bool>
1370LoopInterchangeProfitability::isProfitablePerLoopCacheAnalysis(
1371 const DenseMap<const Loop *, unsigned> &CostMap, CacheCost *CC) {
1372 // This is the new cost model returned from loop cache analysis.
1373 // A smaller index means the loop should be placed an outer loop, and vice
1374 // versa.
1375 auto InnerLoopIt = CostMap.find(InnerLoop);
1376 if (InnerLoopIt == CostMap.end())
1377 return std::nullopt;
1378 auto OuterLoopIt = CostMap.find(OuterLoop);
1379 if (OuterLoopIt == CostMap.end())
1380 return std::nullopt;
1381
1382 if (CC->getLoopCost(*OuterLoop) == CC->getLoopCost(*InnerLoop))
1383 return std::nullopt;
1384 unsigned InnerIndex = InnerLoopIt->second;
1385 unsigned OuterIndex = OuterLoopIt->second;
1386 LLVM_DEBUG(dbgs() << "InnerIndex = " << InnerIndex
1387 << ", OuterIndex = " << OuterIndex << "\n");
1388 assert(InnerIndex != OuterIndex && "CostMap should assign unique "
1389 "numbers to each loop");
1390 return std::optional<bool>(InnerIndex < OuterIndex);
1391}
1392
1393std::optional<bool>
1394LoopInterchangeProfitability::isProfitablePerInstrOrderCost() {
1395 // Legacy cost model: this is rough cost estimation algorithm. It counts the
1396 // good and bad order of induction variables in the instruction and allows
1397 // reordering if number of bad orders is more than good.
1398 int Cost = getInstrOrderCost();
1399 LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n");
1401 return std::optional<bool>(true);
1402
1403 return std::nullopt;
1404}
1405
1406/// Return true if we can vectorize the loop specified by \p LoopId.
1407static bool canVectorize(const CharMatrix &DepMatrix, unsigned LoopId) {
1408 for (const auto &Dep : DepMatrix) {
1409 char Dir = Dep[LoopId];
1410 char DepType = Dep.back();
1411 assert((DepType == '<' || DepType == '*') &&
1412 "Unexpected element in dependency vector");
1413
1414 // There are no loop-carried dependencies.
1415 if (Dir == '=' || Dir == 'I')
1416 continue;
1417
1418 // DepType being '<' means that this direction vector represents a forward
1419 // dependency. In principle, a loop with '<' direction can be vectorized in
1420 // this case.
1421 if (Dir == '<' && DepType == '<')
1422 continue;
1423
1424 // We cannot prove that the loop is vectorizable.
1425 return false;
1426 }
1427 return true;
1428}
1429
1430std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
1431 unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix) {
1432 // If the outer loop cannot be vectorized, it is not profitable to move this
1433 // to inner position.
1434 if (!canVectorize(DepMatrix, OuterLoopId))
1435 return false;
1436
1437 // If the inner loop cannot be vectorized but the outer loop can be, then it
1438 // is profitable to interchange to enable inner loop parallelism.
1439 if (!canVectorize(DepMatrix, InnerLoopId))
1440 return true;
1441
1442 // If both the inner and the outer loop can be vectorized, it is necessary to
1443 // check the cost of each vectorized loop for profitability decision. At this
1444 // time we do not have a cost model to estimate them, so return nullopt.
1445 // TODO: Estimate the cost of vectorized loop when both the outer and the
1446 // inner loop can be vectorized.
1447 return std::nullopt;
1448}
1449
1450bool LoopInterchangeProfitability::isProfitable(
1451 const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
1452 unsigned OuterLoopId, CharMatrix &DepMatrix, CacheCostManager &CCM) {
1453
1454 // Return true if interchange is forced and the cost-model ignored.
1455 if (Profitabilities.size() == 1 && Profitabilities[0] == RuleTy::Ignore)
1456 return true;
1458 "Duplicate rules and option 'ignore' are not allowed");
1459
1460 // isProfitable() is structured to avoid endless loop interchange. If the
1461 // highest priority rule (isProfitablePerLoopCacheAnalysis by default) could
1462 // decide the profitability then, profitability check will stop and return the
1463 // analysis result. If it failed to determine it (e.g., cache analysis failed
1464 // to analyze the loopnest due to delinearization issues) then go ahead the
1465 // second highest priority rule (isProfitablePerInstrOrderCost by default).
1466 // Likewise, if it failed to analysis the profitability then only, the last
1467 // rule (isProfitableForVectorization by default) will decide.
1468 std::optional<bool> shouldInterchange;
1469 for (RuleTy RT : Profitabilities) {
1470 switch (RT) {
1471 case RuleTy::PerLoopCacheAnalysis: {
1472 CacheCost *CC = CCM.getCacheCost();
1473 const DenseMap<const Loop *, unsigned> &CostMap = CCM.getCostMap();
1474 shouldInterchange = isProfitablePerLoopCacheAnalysis(CostMap, CC);
1475 break;
1476 }
1477 case RuleTy::PerInstrOrderCost:
1478 shouldInterchange = isProfitablePerInstrOrderCost();
1479 break;
1480 case RuleTy::ForVectorization:
1481 shouldInterchange =
1482 isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
1483 break;
1484 case RuleTy::Ignore:
1485 llvm_unreachable("Option 'ignore' is not supported with other options");
1486 break;
1487 }
1488
1489 // If this rule could determine the profitability, don't call subsequent
1490 // rules.
1491 if (shouldInterchange.has_value())
1492 break;
1493 }
1494
1495 if (!shouldInterchange.has_value()) {
1496 ORE->emit([&]() {
1497 return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
1498 InnerLoop->getStartLoc(),
1499 InnerLoop->getHeader())
1500 << "Insufficient information to calculate the cost of loop for "
1501 "interchange.";
1502 });
1503 return false;
1504 } else if (!shouldInterchange.value()) {
1505 ORE->emit([&]() {
1506 return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
1507 InnerLoop->getStartLoc(),
1508 InnerLoop->getHeader())
1509 << "Interchanging loops is not considered to improve cache "
1510 "locality nor vectorization.";
1511 });
1512 return false;
1513 }
1514 return true;
1515}
1516
1517void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
1518 Loop *InnerLoop) {
1519 for (Loop *L : *OuterLoop)
1520 if (L == InnerLoop) {
1521 OuterLoop->removeChildLoop(L);
1522 return;
1523 }
1524 llvm_unreachable("Couldn't find loop");
1525}
1526
1527/// Update LoopInfo, after interchanging. NewInner and NewOuter refer to the
1528/// new inner and outer loop after interchanging: NewInner is the original
1529/// outer loop and NewOuter is the original inner loop.
1530///
1531/// Before interchanging, we have the following structure
1532/// Outer preheader
1533// Outer header
1534// Inner preheader
1535// Inner header
1536// Inner body
1537// Inner latch
1538// outer bbs
1539// Outer latch
1540//
1541// After interchanging:
1542// Inner preheader
1543// Inner header
1544// Outer preheader
1545// Outer header
1546// Inner body
1547// outer bbs
1548// Outer latch
1549// Inner latch
1550void LoopInterchangeTransform::restructureLoops(
1551 Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader,
1552 BasicBlock *OrigOuterPreHeader) {
1553 Loop *OuterLoopParent = OuterLoop->getParentLoop();
1554 // The original inner loop preheader moves from the new inner loop to
1555 // the parent loop, if there is one.
1556 NewInner->removeBlockFromLoop(OrigInnerPreHeader);
1557 LI->changeLoopFor(OrigInnerPreHeader, OuterLoopParent);
1558
1559 // Switch the loop levels.
1560 if (OuterLoopParent) {
1561 // Remove the loop from its parent loop.
1562 removeChildLoop(OuterLoopParent, NewInner);
1563 removeChildLoop(NewInner, NewOuter);
1564 OuterLoopParent->addChildLoop(NewOuter);
1565 } else {
1566 removeChildLoop(NewInner, NewOuter);
1567 LI->changeTopLevelLoop(NewInner, NewOuter);
1568 }
1569 while (!NewOuter->isInnermost())
1570 NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin()));
1571 NewOuter->addChildLoop(NewInner);
1572
1573 // BBs from the original inner loop.
1574 SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->blocks());
1575
1576 // Add BBs from the original outer loop to the original inner loop (excluding
1577 // BBs already in inner loop)
1578 for (BasicBlock *BB : NewInner->blocks())
1579 if (LI->getLoopFor(BB) == NewInner)
1580 NewOuter->addBlockEntry(BB);
1581
1582 // Now remove inner loop header and latch from the new inner loop and move
1583 // other BBs (the loop body) to the new inner loop.
1584 BasicBlock *OuterHeader = NewOuter->getHeader();
1585 BasicBlock *OuterLatch = NewOuter->getLoopLatch();
1586 for (BasicBlock *BB : OrigInnerBBs) {
1587 // Nothing will change for BBs in child loops.
1588 if (LI->getLoopFor(BB) != NewOuter)
1589 continue;
1590 // Remove the new outer loop header and latch from the new inner loop.
1591 if (BB == OuterHeader || BB == OuterLatch)
1592 NewInner->removeBlockFromLoop(BB);
1593 else
1594 LI->changeLoopFor(BB, NewInner);
1595 }
1596
1597 // The preheader of the original outer loop becomes part of the new
1598 // outer loop.
1599 NewOuter->addBlockEntry(OrigOuterPreHeader);
1600 LI->changeLoopFor(OrigOuterPreHeader, NewOuter);
1601
1602 // Tell SE that we move the loops around.
1603 SE->forgetLoop(NewOuter);
1604}
1605
1606bool LoopInterchangeTransform::transform(
1607 ArrayRef<Instruction *> DropNoWrapInsts) {
1608 bool Transformed = false;
1609
1610 if (InnerLoop->getSubLoops().empty()) {
1611 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1612 LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n");
1613 auto &InductionPHIs = LIL.getInnerLoopInductions();
1614 if (InductionPHIs.empty()) {
1615 LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n");
1616 return false;
1617 }
1618
1619 SmallVector<Instruction *, 8> InnerIndexVarList;
1620 for (PHINode *CurInductionPHI : InductionPHIs) {
1621 if (CurInductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
1622 InnerIndexVarList.push_back(
1623 dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(1)));
1624 else
1625 InnerIndexVarList.push_back(
1626 dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(0)));
1627 }
1628
1629 // Create a new latch block for the inner loop. We split at the
1630 // current latch's terminator and then move the condition and all
1631 // operands that are not either loop-invariant or the induction PHI into the
1632 // new latch block.
1633 BasicBlock *NewLatch =
1634 SplitBlock(InnerLoop->getLoopLatch(),
1635 InnerLoop->getLoopLatch()->getTerminator(), DT, LI);
1636
1638 unsigned i = 0;
1639 auto MoveInstructions = [&i, &WorkList, this, &InductionPHIs, NewLatch]() {
1640 for (; i < WorkList.size(); i++) {
1641 // Duplicate instruction and move it the new latch. Update uses that
1642 // have been moved.
1643 Instruction *NewI = WorkList[i]->clone();
1644 NewI->insertBefore(NewLatch->getFirstNonPHIIt());
1645 assert(!NewI->mayHaveSideEffects() &&
1646 "Moving instructions with side-effects may change behavior of "
1647 "the loop nest!");
1648 for (Use &U : llvm::make_early_inc_range(WorkList[i]->uses())) {
1649 Instruction *UserI = cast<Instruction>(U.getUser());
1650 if (!InnerLoop->contains(UserI->getParent()) ||
1651 UserI->getParent() == NewLatch ||
1652 llvm::is_contained(InductionPHIs, UserI))
1653 U.set(NewI);
1654 }
1655 // Add operands of moved instruction to the worklist, except if they are
1656 // outside the inner loop or are the induction PHI.
1657 for (Value *Op : WorkList[i]->operands()) {
1658 Instruction *OpI = dyn_cast<Instruction>(Op);
1659 if (!OpI ||
1660 this->LI->getLoopFor(OpI->getParent()) != this->InnerLoop ||
1661 llvm::is_contained(InductionPHIs, OpI))
1662 continue;
1663 WorkList.insert(OpI);
1664 }
1665 }
1666 };
1667
1668 // FIXME: Should we interchange when we have a constant condition?
1669 Instruction *CondI = dyn_cast<Instruction>(
1670 cast<BranchInst>(InnerLoop->getLoopLatch()->getTerminator())
1671 ->getCondition());
1672 if (CondI)
1673 WorkList.insert(CondI);
1674 MoveInstructions();
1675 for (Instruction *InnerIndexVar : InnerIndexVarList)
1676 WorkList.insert(cast<Instruction>(InnerIndexVar));
1677 MoveInstructions();
1678 }
1679
1680 // Ensure the inner loop phi nodes have a separate basic block.
1681 BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1682 if (&*InnerLoopHeader->getFirstNonPHIIt() !=
1683 InnerLoopHeader->getTerminator()) {
1684 SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHIIt(), DT, LI);
1685 LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n");
1686 }
1687
1688 // Instructions in the original inner loop preheader may depend on values
1689 // defined in the outer loop header. Move them there, because the original
1690 // inner loop preheader will become the entry into the interchanged loop nest.
1691 // Currently we move all instructions and rely on LICM to move invariant
1692 // instructions outside the loop nest.
1693 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1694 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1695 if (InnerLoopPreHeader != OuterLoopHeader) {
1696 for (Instruction &I :
1697 make_early_inc_range(make_range(InnerLoopPreHeader->begin(),
1698 std::prev(InnerLoopPreHeader->end()))))
1699 I.moveBeforePreserving(OuterLoopHeader->getTerminator()->getIterator());
1700 }
1701
1702 Transformed |= adjustLoopLinks();
1703 if (!Transformed) {
1704 LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n");
1705 return false;
1706 }
1707
1708 // Finally, drop the nsw/nuw flags from the instructions for reduction
1709 // calculations.
1710 for (Instruction *Reduction : DropNoWrapInsts) {
1711 Reduction->setHasNoSignedWrap(false);
1712 Reduction->setHasNoUnsignedWrap(false);
1713 }
1714
1715 return true;
1716}
1717
1718/// \brief Move all instructions except the terminator from FromBB right before
1719/// InsertBefore
1720static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
1721 BasicBlock *ToBB = InsertBefore->getParent();
1722
1723 ToBB->splice(InsertBefore->getIterator(), FromBB, FromBB->begin(),
1724 FromBB->getTerminator()->getIterator());
1725}
1726
1727/// Swap instructions between \p BB1 and \p BB2 but keep terminators intact.
1728static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2) {
1729 // Save all non-terminator instructions of BB1 into TempInstrs and unlink them
1730 // from BB1 afterwards.
1731 auto Iter = map_range(*BB1, [](Instruction &I) { return &I; });
1732 SmallVector<Instruction *, 4> TempInstrs(Iter.begin(), std::prev(Iter.end()));
1733 for (Instruction *I : TempInstrs)
1734 I->removeFromParent();
1735
1736 // Move instructions from BB2 to BB1.
1737 moveBBContents(BB2, BB1->getTerminator());
1738
1739 // Move instructions from TempInstrs to BB2.
1740 for (Instruction *I : TempInstrs)
1741 I->insertBefore(BB2->getTerminator()->getIterator());
1742}
1743
1744// Update BI to jump to NewBB instead of OldBB. Records updates to the
1745// dominator tree in DTUpdates. If \p MustUpdateOnce is true, assert that
1746// \p OldBB is exactly once in BI's successor list.
1747static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
1748 BasicBlock *NewBB,
1749 std::vector<DominatorTree::UpdateType> &DTUpdates,
1750 bool MustUpdateOnce = true) {
1751 assert((!MustUpdateOnce || llvm::count(successors(BI), OldBB) == 1) &&
1752 "BI must jump to OldBB exactly once.");
1753 bool Changed = false;
1754 for (Use &Op : BI->operands())
1755 if (Op == OldBB) {
1756 Op.set(NewBB);
1757 Changed = true;
1758 }
1759
1760 if (Changed) {
1761 DTUpdates.push_back(
1762 {DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB});
1763 DTUpdates.push_back(
1764 {DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB});
1765 }
1766 assert(Changed && "Expected a successor to be updated");
1767}
1768
1769// Move Lcssa PHIs to the right place.
1770static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
1771 BasicBlock *InnerLatch, BasicBlock *OuterHeader,
1772 BasicBlock *OuterLatch, BasicBlock *OuterExit,
1773 Loop *InnerLoop, LoopInfo *LI) {
1774
1775 // Deal with LCSSA PHI nodes in the exit block of the inner loop, that are
1776 // defined either in the header or latch. Those blocks will become header and
1777 // latch of the new outer loop, and the only possible users can PHI nodes
1778 // in the exit block of the loop nest or the outer loop header (reduction
1779 // PHIs, in that case, the incoming value must be defined in the inner loop
1780 // header). We can just substitute the user with the incoming value and remove
1781 // the PHI.
1782 for (PHINode &P : make_early_inc_range(InnerExit->phis())) {
1783 assert(P.getNumIncomingValues() == 1 &&
1784 "Only loops with a single exit are supported!");
1785
1786 // Incoming values are guaranteed be instructions currently.
1787 auto IncI = cast<Instruction>(P.getIncomingValueForBlock(InnerLatch));
1788 // In case of multi-level nested loops, follow LCSSA to find the incoming
1789 // value defined from the innermost loop.
1790 auto IncIInnerMost = cast<Instruction>(followLCSSA(IncI));
1791 // Skip phis with incoming values from the inner loop body, excluding the
1792 // header and latch.
1793 if (IncIInnerMost->getParent() != InnerLatch &&
1794 IncIInnerMost->getParent() != InnerHeader)
1795 continue;
1796
1797 assert(all_of(P.users(),
1798 [OuterHeader, OuterExit, IncI, InnerHeader](User *U) {
1799 return (cast<PHINode>(U)->getParent() == OuterHeader &&
1800 IncI->getParent() == InnerHeader) ||
1801 cast<PHINode>(U)->getParent() == OuterExit;
1802 }) &&
1803 "Can only replace phis iff the uses are in the loop nest exit or "
1804 "the incoming value is defined in the inner header (it will "
1805 "dominate all loop blocks after interchanging)");
1806 P.replaceAllUsesWith(IncI);
1807 P.eraseFromParent();
1808 }
1809
1810 SmallVector<PHINode *, 8> LcssaInnerExit(
1811 llvm::make_pointer_range(InnerExit->phis()));
1812
1813 SmallVector<PHINode *, 8> LcssaInnerLatch(
1814 llvm::make_pointer_range(InnerLatch->phis()));
1815
1816 // Lcssa PHIs for values used outside the inner loop are in InnerExit.
1817 // If a PHI node has users outside of InnerExit, it has a use outside the
1818 // interchanged loop and we have to preserve it. We move these to
1819 // InnerLatch, which will become the new exit block for the innermost
1820 // loop after interchanging.
1821 for (PHINode *P : LcssaInnerExit)
1822 P->moveBefore(InnerLatch->getFirstNonPHIIt());
1823
1824 // If the inner loop latch contains LCSSA PHIs, those come from a child loop
1825 // and we have to move them to the new inner latch.
1826 for (PHINode *P : LcssaInnerLatch)
1827 P->moveBefore(InnerExit->getFirstNonPHIIt());
1828
1829 // Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have
1830 // incoming values defined in the outer loop, we have to add a new PHI
1831 // in the inner loop latch, which became the exit block of the outer loop,
1832 // after interchanging.
1833 if (OuterExit) {
1834 for (PHINode &P : OuterExit->phis()) {
1835 if (P.getNumIncomingValues() != 1)
1836 continue;
1837 // Skip Phis with incoming values defined in the inner loop. Those should
1838 // already have been updated.
1839 auto I = dyn_cast<Instruction>(P.getIncomingValue(0));
1840 if (!I || LI->getLoopFor(I->getParent()) == InnerLoop)
1841 continue;
1842
1843 PHINode *NewPhi = dyn_cast<PHINode>(P.clone());
1844 NewPhi->setIncomingValue(0, P.getIncomingValue(0));
1845 NewPhi->setIncomingBlock(0, OuterLatch);
1846 // We might have incoming edges from other BBs, i.e., the original outer
1847 // header.
1848 for (auto *Pred : predecessors(InnerLatch)) {
1849 if (Pred == OuterLatch)
1850 continue;
1851 NewPhi->addIncoming(P.getIncomingValue(0), Pred);
1852 }
1853 NewPhi->insertBefore(InnerLatch->getFirstNonPHIIt());
1854 P.setIncomingValue(0, NewPhi);
1855 }
1856 }
1857
1858 // Now adjust the incoming blocks for the LCSSA PHIs.
1859 // For PHIs moved from Inner's exit block, we need to replace Inner's latch
1860 // with the new latch.
1861 InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch);
1862}
1863
1864bool LoopInterchangeTransform::adjustLoopBranches() {
1865 LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n");
1866 std::vector<DominatorTree::UpdateType> DTUpdates;
1867
1868 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1869 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1870
1871 assert(OuterLoopPreHeader != OuterLoop->getHeader() &&
1872 InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader &&
1873 InnerLoopPreHeader && "Guaranteed by loop-simplify form");
1874 // Ensure that both preheaders do not contain PHI nodes and have single
1875 // predecessors. This allows us to move them easily. We use
1876 // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing
1877 // preheaders do not satisfy those conditions.
1878 if (isa<PHINode>(OuterLoopPreHeader->begin()) ||
1879 !OuterLoopPreHeader->getUniquePredecessor())
1880 OuterLoopPreHeader =
1881 InsertPreheaderForLoop(OuterLoop, DT, LI, nullptr, true);
1882 if (InnerLoopPreHeader == OuterLoop->getHeader())
1883 InnerLoopPreHeader =
1884 InsertPreheaderForLoop(InnerLoop, DT, LI, nullptr, true);
1885
1886 // Adjust the loop preheader
1887 BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1888 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1889 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1890 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1891 BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor();
1892 BasicBlock *InnerLoopLatchPredecessor =
1893 InnerLoopLatch->getUniquePredecessor();
1894 BasicBlock *InnerLoopLatchSuccessor;
1895 BasicBlock *OuterLoopLatchSuccessor;
1896
1897 BranchInst *OuterLoopLatchBI =
1898 dyn_cast<BranchInst>(OuterLoopLatch->getTerminator());
1899 BranchInst *InnerLoopLatchBI =
1900 dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
1901 BranchInst *OuterLoopHeaderBI =
1902 dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
1903 BranchInst *InnerLoopHeaderBI =
1904 dyn_cast<BranchInst>(InnerLoopHeader->getTerminator());
1905
1906 if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
1907 !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
1908 !InnerLoopHeaderBI)
1909 return false;
1910
1911 BranchInst *InnerLoopLatchPredecessorBI =
1912 dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator());
1913 BranchInst *OuterLoopPredecessorBI =
1914 dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator());
1915
1916 if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
1917 return false;
1918 BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor();
1919 if (!InnerLoopHeaderSuccessor)
1920 return false;
1921
1922 // Adjust Loop Preheader and headers.
1923 // The branches in the outer loop predecessor and the outer loop header can
1924 // be unconditional branches or conditional branches with duplicates. Consider
1925 // this when updating the successors.
1926 updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader,
1927 InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false);
1928 // The outer loop header might or might not branch to the outer latch.
1929 // We are guaranteed to branch to the inner loop preheader.
1930 if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch)) {
1931 // In this case the outerLoopHeader should branch to the InnerLoopLatch.
1932 updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, InnerLoopLatch,
1933 DTUpdates,
1934 /*MustUpdateOnce=*/false);
1935 }
1936 updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader,
1937 InnerLoopHeaderSuccessor, DTUpdates,
1938 /*MustUpdateOnce=*/false);
1939
1940 // Adjust reduction PHI's now that the incoming block has changed.
1941 InnerLoopHeaderSuccessor->replacePhiUsesWith(InnerLoopHeader,
1942 OuterLoopHeader);
1943
1944 updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor,
1945 OuterLoopPreHeader, DTUpdates);
1946
1947 // -------------Adjust loop latches-----------
1948 if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader)
1949 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1);
1950 else
1951 InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0);
1952
1953 updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch,
1954 InnerLoopLatchSuccessor, DTUpdates);
1955
1956 if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
1957 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
1958 else
1959 OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0);
1960
1961 updateSuccessor(InnerLoopLatchBI, InnerLoopLatchSuccessor,
1962 OuterLoopLatchSuccessor, DTUpdates);
1963 updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
1964 DTUpdates);
1965
1966 DT->applyUpdates(DTUpdates);
1967 restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
1968 OuterLoopPreHeader);
1969
1970 moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
1971 OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock(),
1972 InnerLoop, LI);
1973 // For PHIs in the exit block of the outer loop, outer's latch has been
1974 // replaced by Inners'.
1975 OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
1976
1977 auto &OuterInnerReductions = LIL.getOuterInnerReductions();
1978 // Now update the reduction PHIs in the inner and outer loop headers.
1979 SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
1980 for (PHINode &PHI : InnerLoopHeader->phis())
1981 if (OuterInnerReductions.contains(&PHI))
1982 InnerLoopPHIs.push_back(&PHI);
1983
1984 for (PHINode &PHI : OuterLoopHeader->phis())
1985 if (OuterInnerReductions.contains(&PHI))
1986 OuterLoopPHIs.push_back(&PHI);
1987
1988 // Now move the remaining reduction PHIs from outer to inner loop header and
1989 // vice versa. The PHI nodes must be part of a reduction across the inner and
1990 // outer loop and all the remains to do is and updating the incoming blocks.
1991 for (PHINode *PHI : OuterLoopPHIs) {
1992 LLVM_DEBUG(dbgs() << "Outer loop reduction PHIs:\n"; PHI->dump(););
1993 PHI->moveBefore(InnerLoopHeader->getFirstNonPHIIt());
1994 assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node");
1995 }
1996 for (PHINode *PHI : InnerLoopPHIs) {
1997 LLVM_DEBUG(dbgs() << "Inner loop reduction PHIs:\n"; PHI->dump(););
1998 PHI->moveBefore(OuterLoopHeader->getFirstNonPHIIt());
1999 assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node");
2000 }
2001
2002 // Update the incoming blocks for moved PHI nodes.
2003 OuterLoopHeader->replacePhiUsesWith(InnerLoopPreHeader, OuterLoopPreHeader);
2004 OuterLoopHeader->replacePhiUsesWith(InnerLoopLatch, OuterLoopLatch);
2005 InnerLoopHeader->replacePhiUsesWith(OuterLoopPreHeader, InnerLoopPreHeader);
2006 InnerLoopHeader->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
2007
2008 // Values defined in the outer loop header could be used in the inner loop
2009 // latch. In that case, we need to create LCSSA phis for them, because after
2010 // interchanging they will be defined in the new inner loop and used in the
2011 // new outer loop.
2012 SmallVector<Instruction *, 4> MayNeedLCSSAPhis;
2013 for (Instruction &I :
2014 make_range(OuterLoopHeader->begin(), std::prev(OuterLoopHeader->end())))
2015 MayNeedLCSSAPhis.push_back(&I);
2016 formLCSSAForInstructions(MayNeedLCSSAPhis, *DT, *LI, SE);
2017
2018 return true;
2019}
2020
2021bool LoopInterchangeTransform::adjustLoopLinks() {
2022 // Adjust all branches in the inner and outer loop.
2023 bool Changed = adjustLoopBranches();
2024 if (Changed) {
2025 // We have interchanged the preheaders so we need to interchange the data in
2026 // the preheaders as well. This is because the content of the inner
2027 // preheader was previously executed inside the outer loop.
2028 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
2029 BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
2030 swapBBContents(OuterLoopPreHeader, InnerLoopPreHeader);
2031 }
2032 return Changed;
2033}
2034
2038 LPMUpdater &U) {
2039 Function &F = *LN.getParent();
2040 SmallVector<Loop *, 8> LoopList(LN.getLoops());
2041
2042 if (MaxMemInstrCount < 1) {
2043 LLVM_DEBUG(dbgs() << "MaxMemInstrCount should be at least 1");
2044 return PreservedAnalyses::all();
2045 }
2047
2048 // Ensure minimum depth of the loop nest to do the interchange.
2049 if (!hasSupportedLoopDepth(LoopList, ORE))
2050 return PreservedAnalyses::all();
2051 // Ensure computable loop nest.
2052 if (!isComputableLoopNest(&AR.SE, LoopList)) {
2053 LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n");
2054 return PreservedAnalyses::all();
2055 }
2056
2057 ORE.emit([&]() {
2058 return OptimizationRemarkAnalysis(DEBUG_TYPE, "Dependence",
2061 << "Computed dependence info, invoking the transform.";
2062 });
2063
2064 DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
2065 if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &AR, &ORE).run(LN))
2066 return PreservedAnalyses::all();
2067 U.markLoopNestChanged(true);
2069}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file defines the StringMap class.
Rewrite undef for PHI
ReachingDefAnalysis InstSet InstSet & Ignore
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:687
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Given that RA is a live propagate it s liveness to any other values it uses(according to Uses). void DeadArgumentEliminationPass
bool End
Definition: ELF_riscv.cpp:480
#define DEBUG_TYPE
Hexagon Common GEP
This file defines the interface for the loop cache analysis.
static cl::opt< unsigned int > MaxMemInstrCount("loop-interchange-max-meminstr-count", cl::init(64), cl::Hidden, cl::desc("Maximum number of load-store instructions that should be handled " "in the dependency matrix. Higher value may lead to more interchanges " "at the cost of compile-time"))
static cl::opt< int > LoopInterchangeCostThreshold("loop-interchange-threshold", cl::init(0), cl::Hidden, cl::desc("Interchange if you gain more than this number"))
static PHINode * findInnerReductionPhi(Loop *L, Value *V, SmallVectorImpl< Instruction * > &HasNoWrapInsts)
static cl::opt< unsigned int > MinLoopNestDepth("loop-interchange-min-loop-nest-depth", cl::init(2), cl::Hidden, cl::desc("Minimum depth of loop nest considered for the transform"))
static bool isComputableLoopNest(ScalarEvolution *SE, ArrayRef< Loop * > LoopList)
static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore)
Move all instructions except the terminator from FromBB right before InsertBefore.
static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx, unsigned ToIndx)
static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader, BasicBlock *InnerLatch, BasicBlock *OuterHeader, BasicBlock *OuterLatch, BasicBlock *OuterExit, Loop *InnerLoop, LoopInfo *LI)
static void printDepMatrix(CharMatrix &DepMatrix)
static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2)
Swap instructions between BB1 and BB2 but keep terminators intact.
static cl::opt< unsigned int > MaxLoopNestDepth("loop-interchange-max-loop-nest-depth", cl::init(10), cl::Hidden, cl::desc("Maximum depth of loop nest considered for the transform"))
static bool hasSupportedLoopDepth(ArrayRef< Loop * > LoopList, OptimizationRemarkEmitter &ORE)
static bool inThisOrder(const Instruction *Src, const Instruction *Dst)
Return true if Src appears before Dst in the same basic block.
static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
static bool canVectorize(const CharMatrix &DepMatrix, unsigned LoopId)
Return true if we can vectorize the loop specified by LoopId.
static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, unsigned InnerLoopId, unsigned OuterLoopId)
static bool areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL, SmallPtrSetImpl< PHINode * > &Reductions)
#define DEBUG_TYPE
static Value * followLCSSA(Value *SV)
static void populateWorklist(Loop &L, LoopVector &LoopList)
static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, Loop *L, DependenceInfo *DI, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE)
static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB, BasicBlock *NewBB, std::vector< DominatorTree::UpdateType > &DTUpdates, bool MustUpdateOnce=true)
static std::optional< bool > isLexicographicallyPositive(ArrayRef< char > DV, unsigned Begin, unsigned End)
static cl::list< RuleTy > Profitabilities("loop-interchange-profitabilities", cl::ZeroOrMore, cl::MiscFlags::CommaSeparated, cl::Hidden, cl::desc("List of profitability heuristics to be used. They are applied in " "the given order"), cl::list_init< RuleTy >({RuleTy::PerLoopCacheAnalysis, RuleTy::PerInstrOrderCost, RuleTy::ForVectorization}), cl::values(clEnumValN(RuleTy::PerLoopCacheAnalysis, "cache", "Prioritize loop cache cost"), clEnumValN(RuleTy::PerInstrOrderCost, "instorder", "Prioritize the IVs order of each instruction"), clEnumValN(RuleTy::ForVectorization, "vectorize", "Prioritize vectorization"), clEnumValN(RuleTy::Ignore, "ignore", "Ignore profitability, force interchange (does not " "work with other options)")))
static bool noDuplicateRulesAndIgnore(ArrayRef< RuleTy > Rules)
This file defines the interface for the loop nest analysis.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
loop Loop Strength Reduction
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
uint64_t IntrinsicInst * II
#define P(N)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallSet class.
This file defines the SmallVector class.
static bool isProfitable(const StableFunctionMap::StableFunctionEntries &SFS)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
#define LLVM_DEBUG(...)
Definition: Debug.h:119
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:255
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:150
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:191
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
iterator end()
Definition: BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:459
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:528
LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:206
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:337
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:475
LLVM_ABI void replacePhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
Definition: BasicBlock.cpp:635
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:445
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:213
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:233
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition: BasicBlock.h:662
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
CacheCost represents the estimated cost of a inner loop as the number of cache lines used by the memo...
static std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Create a CacheCost for the loop nest rooted by Root.
CacheCostTy getLoopCost(const Loop &L) const
Return the estimated cost of loop L if the given loop is part of the loop nest associated with this o...
This class represents a function call, abstracting a target machine's calling convention.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:666
This class represents an Operation in the Expression.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:177
iterator end()
Definition: DenseMap.h:87
DependenceInfo - This class is the main dependence-analysis driver.
LLVM_ABI std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool UnderRuntimeAssumptions=false)
depends - Tests for a dependence between the Src and Dst instructions.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:165
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:949
A struct for saving information about induction variables.
static LLVM_ABI bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
void removeBlockFromLoop(BlockT *BB)
This removes the specified basic block from the current loop, updating the Blocks as appropriate.
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
BlockT * getHeader() const
iterator_range< block_iterator > blocks() const
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
void addBlockEntry(BlockT *BB)
This adds a basic block directly to the basic block list.
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
iterator begin() const
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
void changeTopLevelLoop(LoopT *OldLoop, LoopT *NewLoop)
Replace the specified loop in the top-level loops list with the indicated loop.
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
static const BasicBlock & skipEmptyBlockUntil(const BasicBlock *From, const BasicBlock *End, bool CheckUniquePred=false)
Recursivelly traverse all empty 'single successor' basic blocks of From (if there are any).
ArrayRef< Loop * > getLoops() const
Get the loops in the nest.
Function * getParent() const
Return the function to which the loop-nest belongs.
Loop & getOutermostLoop() const
Return the outermost loop in the loop nest.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:40
bool isLoopInvariant(const Value *V, bool HasCoroSuspendInst=false) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:61
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:644
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
void setIncomingBlock(unsigned i, BasicBlock *BB)
void setIncomingValue(unsigned i, Value *V)
static unsigned getIncomingValueNumForOperand(unsigned i)
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:118
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:90
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
unsigned getOpcode() const
static LLVM_ABI bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
LLVM_ABI SmallVector< Instruction *, 4 > getReductionOpChain(PHINode *Phi, Loop *L) const
Attempts to find a chain of operations from Phi to LoopExitInst that can be treated as a set of reduc...
RecurKind getRecurrenceKind() const
This node represents a polynomial recurrence on the trip count of the specified loop.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:104
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:168
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:380
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:356
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
Definition: StringMap.h:133
std::pair< iterator, bool > try_emplace(StringRef Key, ArgsTy &&...Args)
Emplace a new element for the specified key into the map if the key isn't already in the map.
Definition: StringMap.h:372
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:154
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
op_range operands()
Definition: User.h:292
LLVM Value Representation.
Definition: Value.h:75
const ParentTy * getParent() const
Definition: ilist_node.h:34
self_iterator getIterator()
Definition: ilist_node.h:134
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:712
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
LLVM_ABI BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2491
auto successors(const MachineBasicBlock *BB)
LLVM_ABI bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:449
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:663
auto map_range(ContainerTy &&C, FuncTy F)
Definition: STLExtras.h:386
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1987
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition: STLExtras.h:345
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:34
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1973
LLVM_ABI bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
Definition: LCSSA.cpp:308
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
auto predecessors(const MachineBasicBlock *BB)
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition: iterator.h:363
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1916
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Direction
An enum for the direction of the loop.
Definition: LoopInfo.h:217