LLVM 22.0.0git
LoopPeel.cpp
Go to the documentation of this file.
1//===- LoopPeel.cpp -------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Loop Peeling Utilities.
10//===----------------------------------------------------------------------===//
11
13#include "llvm/ADT/DenseMap.h"
15#include "llvm/ADT/Statistic.h"
16#include "llvm/Analysis/Loads.h"
23#include "llvm/IR/BasicBlock.h"
24#include "llvm/IR/Dominators.h"
25#include "llvm/IR/Function.h"
26#include "llvm/IR/InstrTypes.h"
27#include "llvm/IR/Instruction.h"
29#include "llvm/IR/LLVMContext.h"
30#include "llvm/IR/MDBuilder.h"
35#include "llvm/Support/Debug.h"
43#include <algorithm>
44#include <cassert>
45#include <cstdint>
46#include <optional>
47
48using namespace llvm;
49using namespace llvm::PatternMatch;
50using namespace llvm::SCEVPatternMatch;
51
52#define DEBUG_TYPE "loop-peel"
53
54STATISTIC(NumPeeled, "Number of loops peeled");
55STATISTIC(NumPeeledEnd, "Number of loops peeled from end");
56
58 "unroll-peel-count", cl::Hidden,
59 cl::desc("Set the unroll peeling count, for testing purposes"));
60
61static cl::opt<bool>
62 UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden,
63 cl::desc("Allows loops to be peeled when the dynamic "
64 "trip count is known to be low."));
65
66static cl::opt<bool>
67 UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling",
68 cl::init(false), cl::Hidden,
69 cl::desc("Allows loop nests to be peeled."));
70
72 "unroll-peel-max-count", cl::init(7), cl::Hidden,
73 cl::desc("Max average trip count which will cause loop peeling."));
74
76 "unroll-force-peel-count", cl::init(0), cl::Hidden,
77 cl::desc("Force a peel count regardless of profiling information."));
78
80 "disable-advanced-peeling", cl::init(false), cl::Hidden,
82 "Disable advance peeling. Issues for convergent targets (D134803)."));
83
85 "enable-peeling-for-iv", cl::init(false), cl::Hidden,
86 cl::desc("Enable peeling to convert Phi nodes into IVs"));
87
88static const char *PeeledCountMetaData = "llvm.loop.peeled.count";
89
90// Check whether we are capable of peeling this loop.
91bool llvm::canPeel(const Loop *L) {
92 // Make sure the loop is in simplified form
93 if (!L->isLoopSimplifyForm())
94 return false;
96 return true;
97
99 L->getUniqueNonLatchExitBlocks(Exits);
100 // The latch must either be the only exiting block or all non-latch exit
101 // blocks have either a deopt or unreachable terminator or compose a chain of
102 // blocks where the last one is either deopt or unreachable terminated. Both
103 // deopt and unreachable terminators are a strong indication they are not
104 // taken. Note that this is a profitability check, not a legality check. Also
105 // note that LoopPeeling currently can only update the branch weights of latch
106 // blocks and branch weights to blocks with deopt or unreachable do not need
107 // updating.
109}
110
111namespace {
112
113// As a loop is peeled, it may be the case that Phi nodes become
114// loop-invariant (ie, known because there is only one choice).
115// For example, consider the following function:
116// void g(int);
117// void binary() {
118// int x = 0;
119// int y = 0;
120// int a = 0;
121// for(int i = 0; i <100000; ++i) {
122// g(x);
123// x = y;
124// g(a);
125// y = a + 1;
126// a = 5;
127// }
128// }
129// Peeling 3 iterations is beneficial because the values for x, y and a
130// become known. The IR for this loop looks something like the following:
131//
132// %i = phi i32 [ 0, %entry ], [ %inc, %if.end ]
133// %a = phi i32 [ 0, %entry ], [ 5, %if.end ]
134// %y = phi i32 [ 0, %entry ], [ %add, %if.end ]
135// %x = phi i32 [ 0, %entry ], [ %y, %if.end ]
136// ...
137// tail call void @_Z1gi(i32 signext %x)
138// tail call void @_Z1gi(i32 signext %a)
139// %add = add nuw nsw i32 %a, 1
140// %inc = add nuw nsw i32 %i, 1
141// %exitcond = icmp eq i32 %inc, 100000
142// br i1 %exitcond, label %for.cond.cleanup, label %for.body
143//
144// The arguments for the calls to g will become known after 3 iterations
145// of the loop, because the phi nodes values become known after 3 iterations
146// of the loop (ie, they are known on the 4th iteration, so peel 3 iterations).
147// The first iteration has g(0), g(0); the second has g(0), g(5); the
148// third has g(1), g(5) and the fourth (and all subsequent) have g(6), g(5).
149// Now consider the phi nodes:
150// %a is a phi with constants so it is determined after iteration 1.
151// %y is a phi based on a constant and %a so it is determined on
152// the iteration after %a is determined, so iteration 2.
153// %x is a phi based on a constant and %y so it is determined on
154// the iteration after %y, so iteration 3.
155// %i is based on itself (and is an induction variable) so it is
156// never determined.
157// This means that peeling off 3 iterations will result in being able to
158// remove the phi nodes for %a, %y, and %x. The arguments for the
159// corresponding calls to g are determined and the code for computing
160// x, y, and a can be removed.
161//
162// Similarly, there are cases where peeling makes Phi nodes loop-inductions
163// (i.e., the value is increased or decreased by a fixed amount on every
164// iteration). For example, consider the following function.
165//
166// #define N 100
167// void f(int a[], int b[]) {
168// int im = N - 1;
169// for (int i = 0; i < N; i++) {
170// a[i] = b[i] + b[im];
171// im = i;
172// }
173// }
174//
175// The IR of the loop will look something like the following.
176//
177// %i = phi i32 [ 0, %entry ], [ %i.next, %for.body ]
178// %im = phi i32 [ 99, %entry ], [ %i, %for.body ]
179// ...
180// %i.next = add nuw nsw i32 %i, 1
181// ...
182//
183// In this case, %im becomes a loop-induction variable by peeling 1 iteration,
184// because %i is a loop-induction one. The peeling count can be determined by
185// the same algorithm with loop-invariant case. Such peeling is profitable for
186// loop-vectorization.
187//
188// The PhiAnalyzer class calculates how many times a loop should be
189// peeled based on the above analysis of the phi nodes in the loop while
190// respecting the maximum specified.
191class PhiAnalyzer {
192public:
193 PhiAnalyzer(const Loop &L, unsigned MaxIterations, bool PeelForIV);
194
195 // Calculate the sufficient minimum number of iterations of the loop to peel
196 // such that phi instructions become determined (subject to allowable limits)
197 std::optional<unsigned> calculateIterationsToPeel();
198
199protected:
200 enum class PeelCounterType {
201 Invariant,
202 Induction,
203 };
204
205 using PeelCounterValue = std::pair<unsigned, PeelCounterType>;
206 using PeelCounter = std::optional<PeelCounterValue>;
207 const PeelCounter Unknown = std::nullopt;
208
209 // Add 1 respecting Unknown and return Unknown if result over MaxIterations
210 PeelCounter addOne(PeelCounter PC) const {
211 if (PC == Unknown)
212 return Unknown;
213 auto [Val, Ty] = *PC;
214 return (Val + 1 <= MaxIterations) ? PeelCounter({Val + 1, Ty}) : Unknown;
215 }
216
217 // Return a value representing zero for the given counter type.
218 PeelCounter makeZero(PeelCounterType Ty) const {
219 return PeelCounter({0, Ty});
220 }
221
222 // Calculate the number of iterations after which the given value becomes an
223 // invariant or an induction.
224 PeelCounter calculate(const Value &);
225
226 // Auxiliary function to calculate the number of iterations for a comparison
227 // instruction or a binary operator.
228 PeelCounter mergeTwoCounters(const Instruction &CmpOrBinaryOp,
229 const PeelCounterValue &LHS,
230 const PeelCounterValue &RHS) const;
231
232 // Returns true if the \p Phi is an induction in the target loop. This is a
233 // lightweight check and possible to detect an IV in some cases.
234 bool isInductionPHI(const PHINode *Phi) const;
235
236 const Loop &L;
237 const unsigned MaxIterations;
238 const bool PeelForIV;
239
240 // Map of Values to number of iterations to invariance or induction
241 SmallDenseMap<const Value *, PeelCounter> IterationsToInvarianceOrInduction;
242};
243
244PhiAnalyzer::PhiAnalyzer(const Loop &L, unsigned MaxIterations, bool PeelForIV)
245 : L(L), MaxIterations(MaxIterations), PeelForIV(PeelForIV) {
246 assert(canPeel(&L) && "loop is not suitable for peeling");
247 assert(MaxIterations > 0 && "no peeling is allowed?");
248}
249
250/// Test whether \p Phi is an induction variable. Although this can be
251/// determined using SCEV analysis, it is expensive to compute here. Instead,
252/// we perform cheaper checks that may not detect complex cases but are
253/// sufficient for some situations.
254bool PhiAnalyzer::isInductionPHI(const PHINode *Phi) const {
255 // Currently we only support a loop that has single latch.
256 BasicBlock *Latch = L.getLoopLatch();
257 if (Latch == nullptr)
258 return false;
259
260 Value *Cur = Phi->getIncomingValueForBlock(Latch);
262 bool VisitBinOp = false;
263
264 // Starting from the incoming value of the Phi, we follow the use-def chain.
265 // We consider Phi to be an IV if we can reach it again by traversing only
266 // add, sub, or cast instructions.
267 while (true) {
268 if (Cur == Phi)
269 break;
270
271 // Avoid infinite loop.
272 if (!Visited.insert(Cur).second)
273 return false;
274
275 auto *I = dyn_cast<Instruction>(Cur);
276 if (!I || !L.contains(I))
277 return false;
278
279 if (auto *Cast = dyn_cast<CastInst>(I)) {
280 Cur = Cast->getOperand(0);
281 } else if (auto *BinOp = dyn_cast<BinaryOperator>(I)) {
282 if (BinOp->getOpcode() != Instruction::Add &&
283 BinOp->getOpcode() != Instruction::Sub)
284 return false;
285 if (!isa<ConstantInt>(BinOp->getOperand(1)))
286 return false;
287
288 VisitBinOp = true;
289 Cur = BinOp->getOperand(0);
290 } else {
291 return false;
292 }
293 }
294
295 // Ignore cases where no binary operations are visited.
296 return VisitBinOp;
297}
298
299/// When either \p LHS or \p RHS is an IV, the result of \p CmpOrBinaryOp is
300/// considered an IV only if it is an addition or a subtraction. Otherwise the
301/// result can be a value that is neither a loop-invariant nor an IV.
302///
303/// If both \p LHS and \p RHS are loop-invariants, then the result of
304/// \CmpOrBinaryOp is also a loop-invariant.
305PhiAnalyzer::PeelCounter
306PhiAnalyzer::mergeTwoCounters(const Instruction &CmpOrBinaryOp,
307 const PeelCounterValue &LHS,
308 const PeelCounterValue &RHS) const {
309 auto &[LVal, LTy] = LHS;
310 auto &[RVal, RTy] = RHS;
311 unsigned NewVal = std::max(LVal, RVal);
312
313 if (LTy == PeelCounterType::Induction || RTy == PeelCounterType::Induction) {
314 if (const auto *BinOp = dyn_cast<BinaryOperator>(&CmpOrBinaryOp)) {
315 if (BinOp->getOpcode() == Instruction::Add ||
316 BinOp->getOpcode() == Instruction::Sub)
317 return PeelCounter({NewVal, PeelCounterType::Induction});
318 }
319 return Unknown;
320 }
321 return PeelCounter({NewVal, PeelCounterType::Invariant});
322}
323
324// This function calculates the number of iterations after which the value
325// becomes an invariant. The pre-calculated values are memorized in a map.
326// N.B. This number will be Unknown or <= MaxIterations.
327// The function is calculated according to the following definition:
328// Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge].
329// F(%x) = G(%y) + 1 (N.B. [MaxIterations | Unknown] + 1 => Unknown)
330// G(%y) = 0 if %y is a loop invariant
331// G(%y) = G(%BackEdgeValue) if %y is a phi in the header block
332// G(%y) = TODO: if %y is an expression based on phis and loop invariants
333// The example looks like:
334// %x = phi(0, %a) <-- becomes invariant starting from 3rd iteration.
335// %y = phi(0, 5)
336// %a = %y + 1
337// G(%y) = Unknown otherwise (including phi not in header block)
338PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) {
339 // If we already know the answer, take it from the map.
340 // Otherwise, place Unknown to map to avoid infinite recursion. Such
341 // cycles can never stop on an invariant.
342 auto [I, Inserted] =
343 IterationsToInvarianceOrInduction.try_emplace(&V, Unknown);
344 if (!Inserted)
345 return I->second;
346
347 if (L.isLoopInvariant(&V))
348 // Loop invariant so known at start.
349 return (IterationsToInvarianceOrInduction[&V] =
350 makeZero(PeelCounterType::Invariant));
351 if (const PHINode *Phi = dyn_cast<PHINode>(&V)) {
352 if (Phi->getParent() != L.getHeader()) {
353 // Phi is not in header block so Unknown.
354 assert(IterationsToInvarianceOrInduction[&V] == Unknown &&
355 "unexpected value saved");
356 return Unknown;
357 }
358
359 // If Phi is an induction, register it as a starting point.
360 if (PeelForIV && isInductionPHI(Phi))
361 return (IterationsToInvarianceOrInduction[&V] =
362 makeZero(PeelCounterType::Induction));
363
364 // We need to analyze the input from the back edge and add 1.
365 Value *Input = Phi->getIncomingValueForBlock(L.getLoopLatch());
366 PeelCounter Iterations = calculate(*Input);
367 assert(IterationsToInvarianceOrInduction[Input] == Iterations &&
368 "unexpected value saved");
369 return (IterationsToInvarianceOrInduction[Phi] = addOne(Iterations));
370 }
371 if (const Instruction *I = dyn_cast<Instruction>(&V)) {
372 if (isa<CmpInst>(I) || I->isBinaryOp()) {
373 // Binary instructions get the max of the operands.
374 PeelCounter LHS = calculate(*I->getOperand(0));
375 if (LHS == Unknown)
376 return Unknown;
377 PeelCounter RHS = calculate(*I->getOperand(1));
378 if (RHS == Unknown)
379 return Unknown;
380 return (IterationsToInvarianceOrInduction[I] =
381 mergeTwoCounters(*I, *LHS, *RHS));
382 }
383 if (I->isCast())
384 // Cast instructions get the value of the operand.
385 return (IterationsToInvarianceOrInduction[I] =
386 calculate(*I->getOperand(0)));
387 }
388 // TODO: handle more expressions
389
390 // Everything else is Unknown.
391 assert(IterationsToInvarianceOrInduction[&V] == Unknown &&
392 "unexpected value saved");
393 return Unknown;
394}
395
396std::optional<unsigned> PhiAnalyzer::calculateIterationsToPeel() {
397 unsigned Iterations = 0;
398 for (auto &PHI : L.getHeader()->phis()) {
399 PeelCounter ToInvarianceOrInduction = calculate(PHI);
400 if (ToInvarianceOrInduction != Unknown) {
401 unsigned Val = ToInvarianceOrInduction->first;
402 assert(Val <= MaxIterations && "bad result in phi analysis");
403 Iterations = std::max(Iterations, Val);
404 if (Iterations == MaxIterations)
405 break;
406 }
407 }
408 assert((Iterations <= MaxIterations) && "bad result in phi analysis");
409 return Iterations ? std::optional<unsigned>(Iterations) : std::nullopt;
410}
411
412} // unnamed namespace
413
414// Try to find any invariant memory reads that will become dereferenceable in
415// the remainder loop after peeling. The load must also be used (transitively)
416// by an exit condition. Returns the number of iterations to peel off (at the
417// moment either 0 or 1).
419 DominatorTree &DT,
420 AssumptionCache *AC) {
421 // Skip loops with a single exiting block, because there should be no benefit
422 // for the heuristic below.
423 if (L.getExitingBlock())
424 return 0;
425
426 // All non-latch exit blocks must have an UnreachableInst terminator.
427 // Otherwise the heuristic below may not be profitable.
429 L.getUniqueNonLatchExitBlocks(Exits);
430 if (any_of(Exits, [](const BasicBlock *BB) {
431 return !isa<UnreachableInst>(BB->getTerminator());
432 }))
433 return 0;
434
435 // Now look for invariant loads that dominate the latch and are not known to
436 // be dereferenceable. If there are such loads and no writes, they will become
437 // dereferenceable in the loop if the first iteration is peeled off. Also
438 // collect the set of instructions controlled by such loads. Only peel if an
439 // exit condition uses (transitively) such a load.
440 BasicBlock *Header = L.getHeader();
441 BasicBlock *Latch = L.getLoopLatch();
442 SmallPtrSet<Value *, 8> LoadUsers;
443 const DataLayout &DL = L.getHeader()->getDataLayout();
444 for (BasicBlock *BB : L.blocks()) {
445 for (Instruction &I : *BB) {
446 if (I.mayWriteToMemory())
447 return 0;
448
449 if (LoadUsers.contains(&I))
450 LoadUsers.insert_range(I.users());
451 // Do not look for reads in the header; they can already be hoisted
452 // without peeling.
453 if (BB == Header)
454 continue;
455 if (auto *LI = dyn_cast<LoadInst>(&I)) {
456 Value *Ptr = LI->getPointerOperand();
457 if (DT.dominates(BB, Latch) && L.isLoopInvariant(Ptr) &&
458 !isDereferenceablePointer(Ptr, LI->getType(), DL, LI, AC, &DT))
459 LoadUsers.insert_range(I.users());
460 }
461 }
462 }
463 SmallVector<BasicBlock *> ExitingBlocks;
464 L.getExitingBlocks(ExitingBlocks);
465 if (any_of(ExitingBlocks, [&LoadUsers](BasicBlock *Exiting) {
466 return LoadUsers.contains(Exiting->getTerminator());
467 }))
468 return 1;
469 return 0;
470}
471
473 const SCEV *BTC = SE.getBackedgeTakenCount(&L);
474 if (isa<SCEVCouldNotCompute>(BTC))
475 return false;
476
477 // Check if the exit condition of the loop can be adjusted by the peeling
478 // codegen. For now, it must
479 // * exit via the latch,
480 // * the exit condition must be a NE/EQ compare of an induction with step
481 // of 1 and must only be used by the exiting branch.
482 BasicBlock *Latch = L.getLoopLatch();
483 Value *Inc;
484 Value *Bound;
485 CmpPredicate Pred;
486 BasicBlock *Succ1;
487 BasicBlock *Succ2;
488 return Latch && Latch == L.getExitingBlock() &&
489 match(Latch->getTerminator(),
490 m_Br(m_OneUse(m_ICmp(Pred, m_Value(Inc), m_Value(Bound))),
491 m_BasicBlock(Succ1), m_BasicBlock(Succ2))) &&
492 ((Pred == CmpInst::ICMP_EQ && Succ2 == L.getHeader()) ||
493 (Pred == CmpInst::ICMP_NE && Succ1 == L.getHeader())) &&
494 Bound->getType()->isIntegerTy() &&
495 SE.isLoopInvariant(SE.getSCEV(Bound), &L) &&
496 match(SE.getSCEV(Inc),
498}
499
500/// Returns true if the last iteration can be peeled off and the condition (Pred
501/// LeftAR, RightSCEV) is known at the last iteration and the inverse condition
502/// is known at the second-to-last.
504 const SCEVAddRecExpr *LeftAR,
505 const SCEV *RightSCEV, ScalarEvolution &SE,
506 const TargetTransformInfo &TTI) {
507 if (!canPeelLastIteration(L, SE))
508 return false;
509
510 const SCEV *BTC = SE.getBackedgeTakenCount(&L);
511 SCEVExpander Expander(SE, L.getHeader()->getDataLayout(), "loop-peel");
512 if (!SE.isKnownNonZero(BTC) &&
514 L.getLoopPredecessor()->getTerminator()))
515 return false;
516
517 auto Guards = ScalarEvolution::LoopGuards::collect(&L, SE);
518 BTC = SE.applyLoopGuards(BTC, Guards);
519 RightSCEV = SE.applyLoopGuards(RightSCEV, Guards);
520 const SCEV *ValAtLastIter = LeftAR->evaluateAtIteration(BTC, SE);
521 const SCEV *ValAtSecondToLastIter = LeftAR->evaluateAtIteration(
522 SE.getMinusSCEV(BTC, SE.getOne(BTC->getType())), SE);
523
524 return SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), ValAtLastIter,
525 RightSCEV) &&
526 SE.isKnownPredicate(Pred, ValAtSecondToLastIter, RightSCEV);
527}
528
529// Return the number of iterations to peel off from the beginning and end of the
530// loop respectively, that make conditions in the body true/false. For example,
531// if we peel 2 iterations off the loop below, the condition i < 2 can be
532// evaluated at compile time.
533//
534// for (i = 0; i < n; i++)
535// if (i < 2)
536// ..
537// else
538// ..
539// }
540static std::pair<unsigned, unsigned>
541countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE,
542 const TargetTransformInfo &TTI) {
543 assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form");
544 unsigned DesiredPeelCount = 0;
545 unsigned DesiredPeelCountLast = 0;
546
547 // Do not peel the entire loop.
548 const SCEV *BE = SE.getConstantMaxBackedgeTakenCount(&L);
549 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(BE))
550 MaxPeelCount =
551 std::min((unsigned)SC->getAPInt().getLimitedValue() - 1, MaxPeelCount);
552
553 // Increase PeelCount while (IterVal Pred BoundSCEV) condition is satisfied;
554 // return true if inversed condition become known before reaching the
555 // MaxPeelCount limit.
556 auto PeelWhilePredicateIsKnown =
557 [&](unsigned &PeelCount, const SCEV *&IterVal, const SCEV *BoundSCEV,
558 const SCEV *Step, ICmpInst::Predicate Pred) {
559 while (PeelCount < MaxPeelCount &&
560 SE.isKnownPredicate(Pred, IterVal, BoundSCEV)) {
561 IterVal = SE.getAddExpr(IterVal, Step);
562 ++PeelCount;
563 }
564 return SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal,
565 BoundSCEV);
566 };
567
568 const unsigned MaxDepth = 4;
569 std::function<void(Value *, unsigned)> ComputePeelCount =
570 [&](Value *Condition, unsigned Depth) -> void {
571 if (!Condition->getType()->isIntegerTy() || Depth >= MaxDepth)
572 return;
573
574 Value *LeftVal, *RightVal;
575 if (match(Condition, m_And(m_Value(LeftVal), m_Value(RightVal))) ||
576 match(Condition, m_Or(m_Value(LeftVal), m_Value(RightVal)))) {
577 ComputePeelCount(LeftVal, Depth + 1);
578 ComputePeelCount(RightVal, Depth + 1);
579 return;
580 }
581
582 CmpPredicate Pred;
583 if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal))))
584 return;
585
586 const SCEV *LeftSCEV = SE.getSCEV(LeftVal);
587 const SCEV *RightSCEV = SE.getSCEV(RightVal);
588
589 // Do not consider predicates that are known to be true or false
590 // independently of the loop iteration.
591 if (SE.evaluatePredicate(Pred, LeftSCEV, RightSCEV))
592 return;
593
594 // Check if we have a condition with one AddRec and one non AddRec
595 // expression. Normalize LeftSCEV to be the AddRec.
596 if (!isa<SCEVAddRecExpr>(LeftSCEV)) {
597 if (isa<SCEVAddRecExpr>(RightSCEV)) {
598 std::swap(LeftSCEV, RightSCEV);
599 Pred = ICmpInst::getSwappedPredicate(Pred);
600 } else
601 return;
602 }
603
604 const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(LeftSCEV);
605
606 // Avoid huge SCEV computations in the loop below, make sure we only
607 // consider AddRecs of the loop we are trying to peel.
608 if (!LeftAR->isAffine() || LeftAR->getLoop() != &L)
609 return;
610 if (!(ICmpInst::isEquality(Pred) && LeftAR->hasNoSelfWrap()) &&
611 !SE.getMonotonicPredicateType(LeftAR, Pred))
612 return;
613
614 // Check if extending the current DesiredPeelCount lets us evaluate Pred
615 // or !Pred in the loop body statically.
616 unsigned NewPeelCount = DesiredPeelCount;
617
618 const SCEV *IterVal = LeftAR->evaluateAtIteration(
619 SE.getConstant(LeftSCEV->getType(), NewPeelCount), SE);
620
621 // If the original condition is not known, get the negated predicate
622 // (which holds on the else branch) and check if it is known. This allows
623 // us to peel of iterations that make the original condition false.
624 if (!SE.isKnownPredicate(Pred, IterVal, RightSCEV))
625 Pred = ICmpInst::getInversePredicate(Pred);
626
627 const SCEV *Step = LeftAR->getStepRecurrence(SE);
628 if (!PeelWhilePredicateIsKnown(NewPeelCount, IterVal, RightSCEV, Step,
629 Pred)) {
630 if (shouldPeelLastIteration(L, Pred, LeftAR, RightSCEV, SE, TTI))
631 DesiredPeelCountLast = 1;
632 return;
633 }
634
635 // However, for equality comparisons, that isn't always sufficient to
636 // eliminate the comparsion in loop body, we may need to peel one more
637 // iteration. See if that makes !Pred become unknown again.
638 const SCEV *NextIterVal = SE.getAddExpr(IterVal, Step);
639 if (ICmpInst::isEquality(Pred) &&
640 !SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), NextIterVal,
641 RightSCEV) &&
642 !SE.isKnownPredicate(Pred, IterVal, RightSCEV) &&
643 SE.isKnownPredicate(Pred, NextIterVal, RightSCEV)) {
644 if (NewPeelCount >= MaxPeelCount)
645 return; // Need to peel one more iteration, but can't. Give up.
646 ++NewPeelCount; // Great!
647 }
648
649 DesiredPeelCount = std::max(DesiredPeelCount, NewPeelCount);
650 DesiredPeelCountLast = std::max(DesiredPeelCountLast, NewPeelCount);
651 };
652
653 auto ComputePeelCountMinMax = [&](MinMaxIntrinsic *MinMax) {
654 if (!MinMax->getType()->isIntegerTy())
655 return;
656 Value *LHS = MinMax->getLHS(), *RHS = MinMax->getRHS();
657 const SCEV *BoundSCEV, *IterSCEV;
658 if (L.isLoopInvariant(LHS)) {
659 BoundSCEV = SE.getSCEV(LHS);
660 IterSCEV = SE.getSCEV(RHS);
661 } else if (L.isLoopInvariant(RHS)) {
662 BoundSCEV = SE.getSCEV(RHS);
663 IterSCEV = SE.getSCEV(LHS);
664 } else
665 return;
666 const auto *AddRec = dyn_cast<SCEVAddRecExpr>(IterSCEV);
667 // For simplicity, we support only affine recurrences.
668 if (!AddRec || !AddRec->isAffine() || AddRec->getLoop() != &L)
669 return;
670 const SCEV *Step = AddRec->getStepRecurrence(SE);
671 bool IsSigned = MinMax->isSigned();
672 // To minimize number of peeled iterations, we use strict relational
673 // predicates here.
675 if (SE.isKnownPositive(Step))
676 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
677 else if (SE.isKnownNegative(Step))
678 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
679 else
680 return;
681 // Check that AddRec is not wrapping.
682 if (!(IsSigned ? AddRec->hasNoSignedWrap() : AddRec->hasNoUnsignedWrap()))
683 return;
684 unsigned NewPeelCount = DesiredPeelCount;
685 const SCEV *IterVal = AddRec->evaluateAtIteration(
686 SE.getConstant(AddRec->getType(), NewPeelCount), SE);
687 if (!PeelWhilePredicateIsKnown(NewPeelCount, IterVal, BoundSCEV, Step,
688 Pred)) {
689 if (shouldPeelLastIteration(L, Pred, AddRec, BoundSCEV, SE, TTI))
690 DesiredPeelCountLast = 1;
691 return;
692 }
693 DesiredPeelCount = NewPeelCount;
694 };
695
696 for (BasicBlock *BB : L.blocks()) {
697 for (Instruction &I : *BB) {
698 if (SelectInst *SI = dyn_cast<SelectInst>(&I))
699 ComputePeelCount(SI->getCondition(), 0);
700 if (MinMaxIntrinsic *MinMax = dyn_cast<MinMaxIntrinsic>(&I))
701 ComputePeelCountMinMax(MinMax);
702 }
703
704 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
705 if (!BI || BI->isUnconditional())
706 continue;
707
708 // Ignore loop exit condition.
709 if (L.getLoopLatch() == BB)
710 continue;
711
712 ComputePeelCount(BI->getCondition(), 0);
713 }
714
715 return {DesiredPeelCount, DesiredPeelCountLast};
716}
717
718/// This "heuristic" exactly matches implicit behavior which used to exist
719/// inside getLoopEstimatedTripCount. It was added here to keep an
720/// improvement inside that API from causing peeling to become more aggressive.
721/// This should probably be removed.
723 BasicBlock *Latch = L->getLoopLatch();
724 if (!Latch)
725 return true;
726
727 BranchInst *LatchBR = dyn_cast<BranchInst>(Latch->getTerminator());
728 if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(Latch))
729 return true;
730
731 assert((LatchBR->getSuccessor(0) == L->getHeader() ||
732 LatchBR->getSuccessor(1) == L->getHeader()) &&
733 "At least one edge out of the latch must go to the header");
734
736 L->getUniqueNonLatchExitBlocks(ExitBlocks);
737 return any_of(ExitBlocks, [](const BasicBlock *EB) {
738 return !EB->getTerminatingDeoptimizeCall();
739 });
740}
741
742
743// Return the number of iterations we want to peel off.
744void llvm::computePeelCount(Loop *L, unsigned LoopSize,
746 unsigned TripCount, DominatorTree &DT,
748 AssumptionCache *AC, unsigned Threshold) {
749 assert(LoopSize > 0 && "Zero loop size is not allowed!");
750 // Save the PP.PeelCount value set by the target in
751 // TTI.getPeelingPreferences or by the flag -unroll-peel-count.
752 unsigned TargetPeelCount = PP.PeelCount;
753 PP.PeelCount = 0;
754 PP.PeelLast = false;
755 if (!canPeel(L))
756 return;
757
758 // Only try to peel innermost loops by default.
759 // The constraint can be relaxed by the target in TTI.getPeelingPreferences
760 // or by the flag -unroll-allow-loop-nests-peeling.
761 if (!PP.AllowLoopNestsPeeling && !L->isInnermost())
762 return;
763
764 // If the user provided a peel count, use that.
765 bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0;
766 if (UserPeelCount) {
767 LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
768 << " iterations.\n");
770 PP.PeelProfiledIterations = true;
771 return;
772 }
773
774 // Skip peeling if it's disabled.
775 if (!PP.AllowPeeling)
776 return;
777
778 // Check that we can peel at least one iteration.
779 if (2 * LoopSize > Threshold)
780 return;
781
782 unsigned AlreadyPeeled = 0;
784 AlreadyPeeled = *Peeled;
785 // Stop if we already peeled off the maximum number of iterations.
786 if (AlreadyPeeled >= UnrollPeelMaxCount)
787 return;
788
789 // Pay respect to limitations implied by loop size and the max peel count.
790 unsigned MaxPeelCount = UnrollPeelMaxCount;
791 MaxPeelCount = std::min(MaxPeelCount, Threshold / LoopSize - 1);
792
793 // Start the max computation with the PP.PeelCount value set by the target
794 // in TTI.getPeelingPreferences or by the flag -unroll-peel-count.
795 unsigned DesiredPeelCount = TargetPeelCount;
796
797 // Here we try to get rid of Phis which become invariants or inductions after
798 // 1, 2, ..., N iterations of the loop. For this we compute the number for
799 // iterations after which every Phi is guaranteed to become an invariant or an
800 // induction, and try to peel the maximum number of iterations among these
801 // values, thus turning all those Phis into invariants or inductions.
802 if (MaxPeelCount > DesiredPeelCount) {
803 // Check how many iterations are useful for resolving Phis
804 auto NumPeels = PhiAnalyzer(*L, MaxPeelCount, EnablePeelingForIV)
805 .calculateIterationsToPeel();
806 if (NumPeels)
807 DesiredPeelCount = std::max(DesiredPeelCount, *NumPeels);
808 }
809
810 const auto &[CountToEliminateCmps, CountToEliminateCmpsLast] =
811 countToEliminateCompares(*L, MaxPeelCount, SE, TTI);
812 DesiredPeelCount = std::max(DesiredPeelCount, CountToEliminateCmps);
813
814 if (DesiredPeelCount == 0)
815 DesiredPeelCount = peelToTurnInvariantLoadsDerefencebale(*L, DT, AC);
816
817 if (DesiredPeelCount > 0) {
818 DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
819 // Consider max peel count limitation.
820 assert(DesiredPeelCount > 0 && "Wrong loop size estimation?");
821 if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) {
822 LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
823 << " iteration(s) to turn"
824 << " some Phis into invariants or inductions.\n");
825 PP.PeelCount = DesiredPeelCount;
826 PP.PeelProfiledIterations = false;
827 PP.PeelLast = false;
828 return;
829 }
830 }
831
832 if (CountToEliminateCmpsLast > 0) {
833 unsigned DesiredPeelCountLast =
834 std::min(CountToEliminateCmpsLast, MaxPeelCount);
835 // Consider max peel count limitation.
836 assert(DesiredPeelCountLast > 0 && "Wrong loop size estimation?");
837 if (DesiredPeelCountLast + AlreadyPeeled <= UnrollPeelMaxCount) {
838 LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
839 << " iteration(s) to turn"
840 << " some Phis into invariants.\n");
841 PP.PeelCount = DesiredPeelCountLast;
842 PP.PeelProfiledIterations = false;
843 PP.PeelLast = true;
844 return;
845 }
846 }
847
848 // Bail if we know the statically calculated trip count.
849 // In this case we rather prefer partial unrolling.
850 if (TripCount)
851 return;
852
853 // Do not apply profile base peeling if it is disabled.
855 return;
856 // If we don't know the trip count, but have reason to believe the average
857 // trip count is low, peeling should be beneficial, since we will usually
858 // hit the peeled section.
859 // We only do this in the presence of profile information, since otherwise
860 // our estimates of the trip count are not reliable enough.
861 if (L->getHeader()->getParent()->hasProfileData()) {
863 return;
864 std::optional<unsigned> EstimatedTripCount = getLoopEstimatedTripCount(L);
865 if (!EstimatedTripCount)
866 return;
867
868 LLVM_DEBUG(dbgs() << "Profile-based estimated trip count is "
869 << *EstimatedTripCount << "\n");
870
871 if (*EstimatedTripCount + AlreadyPeeled <= MaxPeelCount) {
872 unsigned PeelCount = *EstimatedTripCount;
873 LLVM_DEBUG(dbgs() << "Peeling first " << PeelCount << " iterations.\n");
874 PP.PeelCount = PeelCount;
875 return;
876 }
877 LLVM_DEBUG(dbgs() << "Already peel count: " << AlreadyPeeled << "\n");
878 LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n");
879 LLVM_DEBUG(dbgs() << "Loop cost: " << LoopSize << "\n");
880 LLVM_DEBUG(dbgs() << "Max peel cost: " << Threshold << "\n");
881 LLVM_DEBUG(dbgs() << "Max peel count by cost: "
882 << (Threshold / LoopSize - 1) << "\n");
883 }
884}
885
887 // Weights for current iteration.
889 // Weights to subtract after each iteration.
891};
892
893/// Update the branch weights of an exiting block of a peeled-off loop
894/// iteration.
895/// Let F is a weight of the edge to continue (fallthrough) into the loop.
896/// Let E is a weight of the edge to an exit.
897/// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to
898/// go to exit.
899/// Then, Estimated ExitCount = F / E.
900/// For I-th (counting from 0) peeled off iteration we set the weights for
901/// the peeled exit as (EC - I, 1). It gives us reasonable distribution,
902/// The probability to go to exit 1/(EC-I) increases. At the same time
903/// the estimated exit count in the remainder loop reduces by I.
904/// To avoid dealing with division rounding we can just multiple both part
905/// of weights to E and use weight as (F - I * E, E).
906static void updateBranchWeights(Instruction *Term, WeightInfo &Info) {
907 setBranchWeights(*Term, Info.Weights, /*IsExpected=*/false);
908 for (auto [Idx, SubWeight] : enumerate(Info.SubWeights))
909 if (SubWeight != 0)
910 // Don't set the probability of taking the edge from latch to loop header
911 // to less than 1:1 ratio (meaning Weight should not be lower than
912 // SubWeight), as this could significantly reduce the loop's hotness,
913 // which would be incorrect in the case of underestimating the trip count.
914 Info.Weights[Idx] =
915 Info.Weights[Idx] > SubWeight
916 ? std::max(Info.Weights[Idx] - SubWeight, SubWeight)
917 : SubWeight;
918}
919
920/// Initialize the weights for all exiting blocks.
922 Loop *L) {
923 SmallVector<BasicBlock *> ExitingBlocks;
924 L->getExitingBlocks(ExitingBlocks);
925 for (BasicBlock *ExitingBlock : ExitingBlocks) {
926 Instruction *Term = ExitingBlock->getTerminator();
927 SmallVector<uint32_t> Weights;
928 if (!extractBranchWeights(*Term, Weights))
929 continue;
930
931 // See the comment on updateBranchWeights() for an explanation of what we
932 // do here.
933 uint32_t FallThroughWeights = 0;
934 uint32_t ExitWeights = 0;
935 for (auto [Succ, Weight] : zip(successors(Term), Weights)) {
936 if (L->contains(Succ))
937 FallThroughWeights += Weight;
938 else
939 ExitWeights += Weight;
940 }
941
942 // Don't try to update weights for degenerate case.
943 if (FallThroughWeights == 0)
944 continue;
945
946 SmallVector<uint32_t> SubWeights;
947 for (auto [Succ, Weight] : zip(successors(Term), Weights)) {
948 if (!L->contains(Succ)) {
949 // Exit weights stay the same.
950 SubWeights.push_back(0);
951 continue;
952 }
953
954 // Subtract exit weights on each iteration, distributed across all
955 // fallthrough edges.
956 double W = (double)Weight / (double)FallThroughWeights;
957 SubWeights.push_back((uint32_t)(ExitWeights * W));
958 }
959
960 WeightInfos.insert({Term, {std::move(Weights), std::move(SubWeights)}});
961 }
962}
963
964/// Clones the body of the loop L, putting it between \p InsertTop and \p
965/// InsertBot.
966/// \param IterNumber The serial number of the iteration currently being
967/// peeled off.
968/// \param PeelLast Peel off the last iterations from \p L.
969/// \param ExitEdges The exit edges of the original loop.
970/// \param[out] NewBlocks A list of the blocks in the newly created clone
971/// \param[out] VMap The value map between the loop and the new clone.
972/// \param LoopBlocks A helper for DFS-traversal of the loop.
973/// \param LVMap A value-map that maps instructions from the original loop to
974/// instructions in the last peeled-off iteration.
975static void cloneLoopBlocks(
976 Loop *L, unsigned IterNumber, bool PeelLast, BasicBlock *InsertTop,
977 BasicBlock *InsertBot, BasicBlock *OrigPreHeader,
978 SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges,
979 SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
981 LoopInfo *LI, ArrayRef<MDNode *> LoopLocalNoAliasDeclScopes,
982 ScalarEvolution &SE) {
983 BasicBlock *Header = L->getHeader();
984 BasicBlock *Latch = L->getLoopLatch();
985 BasicBlock *PreHeader = L->getLoopPreheader();
986
987 Function *F = Header->getParent();
988 LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
989 LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
990 Loop *ParentLoop = L->getParentLoop();
991
992 // For each block in the original loop, create a new copy,
993 // and update the value map with the newly created values.
994 for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
995 BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".peel", F);
996 NewBlocks.push_back(NewBB);
997
998 // If an original block is an immediate child of the loop L, its copy
999 // is a child of a ParentLoop after peeling. If a block is a child of
1000 // a nested loop, it is handled in the cloneLoop() call below.
1001 if (ParentLoop && LI->getLoopFor(*BB) == L)
1002 ParentLoop->addBasicBlockToLoop(NewBB, *LI);
1003
1004 VMap[*BB] = NewBB;
1005
1006 // If dominator tree is available, insert nodes to represent cloned blocks.
1007 if (DT) {
1008 if (Header == *BB)
1009 DT->addNewBlock(NewBB, InsertTop);
1010 else {
1011 DomTreeNode *IDom = DT->getNode(*BB)->getIDom();
1012 // VMap must contain entry for IDom, as the iteration order is RPO.
1013 DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDom->getBlock()]));
1014 }
1015 }
1016 }
1017
1018 {
1019 // Identify what other metadata depends on the cloned version. After
1020 // cloning, replace the metadata with the corrected version for both
1021 // memory instructions and noalias intrinsics.
1022 std::string Ext = (Twine("Peel") + Twine(IterNumber)).str();
1023 cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks,
1024 Header->getContext(), Ext);
1025 }
1026
1027 // Recursively create the new Loop objects for nested loops, if any,
1028 // to preserve LoopInfo.
1029 for (Loop *ChildLoop : *L) {
1030 cloneLoop(ChildLoop, ParentLoop, VMap, LI, nullptr);
1031 }
1032
1033 // Hook-up the control flow for the newly inserted blocks.
1034 // The new header is hooked up directly to the "top", which is either
1035 // the original loop preheader (for the first iteration) or the previous
1036 // iteration's exiting block (for every other iteration)
1037 InsertTop->getTerminator()->setSuccessor(0, cast<BasicBlock>(VMap[Header]));
1038
1039 // Similarly, for the latch:
1040 // The original exiting edge is still hooked up to the loop exit.
1041 BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
1042 if (PeelLast) {
1043 // This is the last iteration and we definitely will go to the exit. Just
1044 // set both successors to InsertBot and let the branch be simplified later.
1045 assert(IterNumber == 0 && "Only peeling a single iteration implemented.");
1046 auto *LatchTerm = cast<BranchInst>(NewLatch->getTerminator());
1047 LatchTerm->setSuccessor(0, InsertBot);
1048 LatchTerm->setSuccessor(1, InsertBot);
1049 } else {
1050 auto *LatchTerm = cast<Instruction>(NewLatch->getTerminator());
1051 // The backedge now goes to the "bottom", which is either the loop's real
1052 // header (for the last peeled iteration) or the copied header of the next
1053 // iteration (for every other iteration)
1054 for (unsigned idx = 0, e = LatchTerm->getNumSuccessors(); idx < e; ++idx) {
1055 if (LatchTerm->getSuccessor(idx) == Header) {
1056 LatchTerm->setSuccessor(idx, InsertBot);
1057 break;
1058 }
1059 }
1060 }
1061 if (DT)
1062 DT->changeImmediateDominator(InsertBot, NewLatch);
1063
1064 // The new copy of the loop body starts with a bunch of PHI nodes
1065 // that pick an incoming value from either the preheader, or the previous
1066 // loop iteration. Since this copy is no longer part of the loop, we
1067 // resolve this statically:
1068 if (PeelLast) {
1069 // For the last iteration, we introduce new phis for each header phi in
1070 // InsertTop, using the incoming value from the preheader for the original
1071 // preheader (when skipping the main loop) and the incoming value from the
1072 // latch for the latch (when continuing from the main loop).
1073 IRBuilder<> B(InsertTop, InsertTop->getFirstNonPHIIt());
1074 for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
1075 PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
1076 PHINode *PN = B.CreatePHI(NewPHI->getType(), 2);
1077 NewPHI->eraseFromParent();
1078 if (OrigPreHeader)
1079 PN->addIncoming(cast<PHINode>(&*I)->getIncomingValueForBlock(PreHeader),
1080 OrigPreHeader);
1081
1082 PN->addIncoming(cast<PHINode>(&*I)->getIncomingValueForBlock(Latch),
1083 Latch);
1084 VMap[&*I] = PN;
1085 }
1086 } else {
1087 // For the first iteration, we use the value from the preheader directly.
1088 // For any other iteration, we replace the phi with the value generated by
1089 // the immediately preceding clone of the loop body (which represents
1090 // the previous iteration).
1091 for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
1092 PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
1093 if (IterNumber == 0) {
1094 VMap[&*I] = NewPHI->getIncomingValueForBlock(PreHeader);
1095 } else {
1096 Value *LatchVal = NewPHI->getIncomingValueForBlock(Latch);
1097 Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
1098 if (LatchInst && L->contains(LatchInst))
1099 VMap[&*I] = LVMap[LatchInst];
1100 else
1101 VMap[&*I] = LatchVal;
1102 }
1103 NewPHI->eraseFromParent();
1104 }
1105 }
1106
1107 // Fix up the outgoing values - we need to add a value for the iteration
1108 // we've just created. Note that this must happen *after* the incoming
1109 // values are adjusted, since the value going out of the latch may also be
1110 // a value coming into the header.
1111 for (auto Edge : ExitEdges)
1112 for (PHINode &PHI : Edge.second->phis()) {
1113 Value *LatchVal = PHI.getIncomingValueForBlock(Edge.first);
1114 Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
1115 if (LatchInst && L->contains(LatchInst))
1116 LatchVal = VMap[LatchVal];
1117 PHI.addIncoming(LatchVal, cast<BasicBlock>(VMap[Edge.first]));
1119 }
1120
1121 // LastValueMap is updated with the values for the current loop
1122 // which are used the next time this function is called.
1123 for (auto KV : VMap)
1124 LVMap[KV.first] = KV.second;
1125}
1126
1129 const TargetTransformInfo &TTI,
1130 std::optional<bool> UserAllowPeeling,
1131 std::optional<bool> UserAllowProfileBasedPeeling,
1132 bool UnrollingSpecficValues) {
1134
1135 // Set the default values.
1136 PP.PeelCount = 0;
1137 PP.AllowPeeling = true;
1138 PP.AllowLoopNestsPeeling = false;
1139 PP.PeelLast = false;
1140 PP.PeelProfiledIterations = true;
1141
1142 // Get the target specifc values.
1143 TTI.getPeelingPreferences(L, SE, PP);
1144
1145 // User specified values using cl::opt.
1146 if (UnrollingSpecficValues) {
1147 if (UnrollPeelCount.getNumOccurrences() > 0)
1153 }
1154
1155 // User specifed values provided by argument.
1156 if (UserAllowPeeling)
1157 PP.AllowPeeling = *UserAllowPeeling;
1158 if (UserAllowProfileBasedPeeling)
1159 PP.PeelProfiledIterations = *UserAllowProfileBasedPeeling;
1160
1161 return PP;
1162}
1163
1164/// Peel off the first \p PeelCount iterations of loop \p L.
1165///
1166/// Note that this does not peel them off as a single straight-line block.
1167/// Rather, each iteration is peeled off separately, and needs to check the
1168/// exit condition.
1169/// For loops that dynamically execute \p PeelCount iterations or less
1170/// this provides a benefit, since the peeled off iterations, which account
1171/// for the bulk of dynamic execution, can be further simplified by scalar
1172/// optimizations.
1173bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
1175 bool PreserveLCSSA, ValueToValueMapTy &LVMap) {
1176 assert(PeelCount > 0 && "Attempt to peel out zero iterations?");
1177 assert(canPeel(L) && "Attempt to peel a loop which is not peelable?");
1178 assert((!PeelLast || (canPeelLastIteration(*L, *SE) && PeelCount == 1)) &&
1179 "when peeling the last iteration, the loop must be supported and can "
1180 "only peel a single iteration");
1181
1182 LoopBlocksDFS LoopBlocks(L);
1183 LoopBlocks.perform(LI);
1184
1185 BasicBlock *Header = L->getHeader();
1186 BasicBlock *PreHeader = L->getLoopPreheader();
1187 BasicBlock *Latch = L->getLoopLatch();
1189 L->getExitEdges(ExitEdges);
1190
1191 // Remember dominators of blocks we might reach through exits to change them
1192 // later. Immediate dominator of such block might change, because we add more
1193 // routes which can lead to the exit: we can reach it from the peeled
1194 // iterations too.
1195 DenseMap<BasicBlock *, BasicBlock *> NonLoopBlocksIDom;
1196 for (auto *BB : L->blocks()) {
1197 auto *BBDomNode = DT.getNode(BB);
1198 SmallVector<BasicBlock *, 16> ChildrenToUpdate;
1199 for (auto *ChildDomNode : BBDomNode->children()) {
1200 auto *ChildBB = ChildDomNode->getBlock();
1201 if (!L->contains(ChildBB))
1202 ChildrenToUpdate.push_back(ChildBB);
1203 }
1204 // The new idom of the block will be the nearest common dominator
1205 // of all copies of the previous idom. This is equivalent to the
1206 // nearest common dominator of the previous idom and the first latch,
1207 // which dominates all copies of the previous idom.
1208 BasicBlock *NewIDom = DT.findNearestCommonDominator(BB, Latch);
1209 for (auto *ChildBB : ChildrenToUpdate)
1210 NonLoopBlocksIDom[ChildBB] = NewIDom;
1211 }
1212
1213 Function *F = Header->getParent();
1214
1215 // Set up all the necessary basic blocks.
1216 BasicBlock *InsertTop;
1217 BasicBlock *InsertBot;
1218 BasicBlock *NewPreHeader = nullptr;
1220 if (PeelLast) {
1221 // It is convenient to split the single exit block from the latch the
1222 // into 3 parts - two blocks to anchor the peeled copy of the loop body,
1223 // and a new final exit block.
1224
1225 // Peeling the last iteration transforms.
1226 //
1227 // PreHeader:
1228 // ...
1229 // Header:
1230 // LoopBody
1231 // If (cond) goto Header
1232 // Exit:
1233 //
1234 // into
1235 //
1236 // Header:
1237 // LoopBody
1238 // If (cond) goto Header
1239 // InsertTop:
1240 // LoopBody
1241 // If (!cond) goto InsertBot
1242 // InsertBot:
1243 // Exit:
1244 // ...
1245 BasicBlock *Exit = L->getExitBlock();
1246 for (PHINode &P : Exit->phis())
1247 ExitValues[&P] = P.getIncomingValueForBlock(Latch);
1248
1249 const SCEV *BTC = SE->getBackedgeTakenCount(L);
1250
1251 InsertTop = SplitEdge(Latch, Exit, &DT, LI);
1252 InsertBot = SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI);
1253
1254 InsertTop->setName(Exit->getName() + ".peel.begin");
1255 InsertBot->setName(Exit->getName() + ".peel.next");
1256 NewPreHeader = nullptr;
1257
1258 // If the original loop may only execute a single iteration we need to
1259 // insert a trip count check and skip the original loop with the last
1260 // iteration peeled off if necessary.
1261 if (!SE->isKnownNonZero(BTC)) {
1262 NewPreHeader = SplitEdge(PreHeader, Header, &DT, LI);
1263 SCEVExpander Expander(*SE, Latch->getDataLayout(), "loop-peel");
1264
1265 BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
1266 Value *BTCValue =
1267 Expander.expandCodeFor(BTC, BTC->getType(), PreHeaderBR);
1268 IRBuilder<> B(PreHeaderBR);
1269 Value *Cond =
1270 B.CreateICmpNE(BTCValue, ConstantInt::get(BTCValue->getType(), 0));
1271 B.CreateCondBr(Cond, NewPreHeader, InsertTop);
1272 PreHeaderBR->eraseFromParent();
1273
1274 // PreHeader now dominates InsertTop.
1275 DT.changeImmediateDominator(InsertTop, PreHeader);
1276 }
1277 } else {
1278 // It is convenient to split the preheader into 3 parts - two blocks to
1279 // anchor the peeled copy of the loop body, and a new preheader for the
1280 // "real" loop.
1281
1282 // Peeling the first iteration transforms.
1283 //
1284 // PreHeader:
1285 // ...
1286 // Header:
1287 // LoopBody
1288 // If (cond) goto Header
1289 // Exit:
1290 //
1291 // into
1292 //
1293 // InsertTop:
1294 // LoopBody
1295 // If (!cond) goto Exit
1296 // InsertBot:
1297 // NewPreHeader:
1298 // ...
1299 // Header:
1300 // LoopBody
1301 // If (cond) goto Header
1302 // Exit:
1303 //
1304 // Each following iteration will split the current bottom anchor in two,
1305 // and put the new copy of the loop body between these two blocks. That
1306 // is, after peeling another iteration from the example above, we'll
1307 // split InsertBot, and get:
1308 //
1309 // InsertTop:
1310 // LoopBody
1311 // If (!cond) goto Exit
1312 // InsertBot:
1313 // LoopBody
1314 // If (!cond) goto Exit
1315 // InsertBot.next:
1316 // NewPreHeader:
1317 // ...
1318 // Header:
1319 // LoopBody
1320 // If (cond) goto Header
1321 // Exit:
1322 //
1323 InsertTop = SplitEdge(PreHeader, Header, &DT, LI);
1324 InsertBot = SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI);
1325 NewPreHeader = SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI);
1326
1327 InsertTop->setName(Header->getName() + ".peel.begin");
1328 InsertBot->setName(Header->getName() + ".peel.next");
1329 NewPreHeader->setName(PreHeader->getName() + ".peel.newph");
1330 }
1331
1332 Instruction *LatchTerm =
1333 cast<Instruction>(cast<BasicBlock>(Latch)->getTerminator());
1334
1335 // If we have branch weight information, we'll want to update it for the
1336 // newly created branches.
1338 initBranchWeights(Weights, L);
1339
1340 // Identify what noalias metadata is inside the loop: if it is inside the
1341 // loop, the associated metadata must be cloned for each iteration.
1342 SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes;
1343 identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes);
1344
1345 // For each peeled-off iteration, make a copy of the loop.
1346 ValueToValueMapTy VMap;
1347 for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
1349
1350 cloneLoopBlocks(L, Iter, PeelLast, InsertTop, InsertBot,
1351 NewPreHeader ? PreHeader : nullptr, ExitEdges, NewBlocks,
1352 LoopBlocks, VMap, LVMap, &DT, LI,
1353 LoopLocalNoAliasDeclScopes, *SE);
1354
1355 // Remap to use values from the current iteration instead of the
1356 // previous one.
1357 remapInstructionsInBlocks(NewBlocks, VMap);
1358
1359 if (Iter == 0) {
1360 if (PeelLast) {
1361 // Adjust the exit condition so the loop exits one iteration early.
1362 // For now we simply subtract one form the second operand of the
1363 // exit condition. This relies on the peel count computation to
1364 // check that this is actually legal. In particular, it ensures that
1365 // the first operand of the compare is an AddRec with step 1 and we
1366 // execute more than one iteration.
1367 auto *Cmp =
1368 cast<ICmpInst>(L->getLoopLatch()->getTerminator()->getOperand(0));
1369 IRBuilder B(Cmp);
1370 Cmp->setOperand(
1371 1, B.CreateSub(Cmp->getOperand(1),
1372 ConstantInt::get(Cmp->getOperand(1)->getType(), 1)));
1373 } else {
1374 // Update IDoms of the blocks reachable through exits.
1375 for (auto BBIDom : NonLoopBlocksIDom)
1376 DT.changeImmediateDominator(BBIDom.first,
1377 cast<BasicBlock>(LVMap[BBIDom.second]));
1378 }
1379 }
1380
1381#ifdef EXPENSIVE_CHECKS
1382 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
1383#endif
1384
1385 for (auto &[Term, Info] : Weights) {
1386 auto *TermCopy = cast<Instruction>(VMap[Term]);
1387 updateBranchWeights(TermCopy, Info);
1388 }
1389
1390 // Remove Loop metadata from the latch branch instruction
1391 // because it is not the Loop's latch branch anymore.
1392 auto *LatchTermCopy = cast<Instruction>(VMap[LatchTerm]);
1393 LatchTermCopy->setMetadata(LLVMContext::MD_loop, nullptr);
1394
1395 InsertTop = InsertBot;
1396 InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI);
1397 InsertBot->setName(Header->getName() + ".peel.next");
1398
1399 F->splice(InsertTop->getIterator(), F, NewBlocks[0]->getIterator(),
1400 F->end());
1401 }
1402
1403 if (PeelLast) {
1404 // Now adjust users of the original exit values by replacing them with the
1405 // exit value from the peeled iteration and remove them.
1406 for (const auto &[P, E] : ExitValues) {
1407 Instruction *ExitInst = dyn_cast<Instruction>(E);
1408 if (ExitInst && L->contains(ExitInst))
1409 P->replaceAllUsesWith(&*VMap[ExitInst]);
1410 else
1411 P->replaceAllUsesWith(E);
1412 P->eraseFromParent();
1413 }
1414 formLCSSA(*L, DT, LI, SE);
1415 } else {
1416 // Now adjust the phi nodes in the loop header to get their initial values
1417 // from the last peeled-off iteration instead of the preheader.
1418 for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
1419 PHINode *PHI = cast<PHINode>(I);
1420 Value *NewVal = PHI->getIncomingValueForBlock(Latch);
1421 Instruction *LatchInst = dyn_cast<Instruction>(NewVal);
1422 if (LatchInst && L->contains(LatchInst))
1423 NewVal = LVMap[LatchInst];
1424
1425 PHI->setIncomingValueForBlock(NewPreHeader, NewVal);
1426 }
1427 }
1428
1429 for (const auto &[Term, Info] : Weights) {
1430 setBranchWeights(*Term, Info.Weights, /*IsExpected=*/false);
1431 }
1432
1433 // Update Metadata for count of peeled off iterations.
1434 unsigned AlreadyPeeled = 0;
1436 AlreadyPeeled = *Peeled;
1437 addStringMetadataToLoop(L, PeeledCountMetaData, AlreadyPeeled + PeelCount);
1438
1439 if (Loop *ParentLoop = L->getParentLoop())
1440 L = ParentLoop;
1441
1442 // We modified the loop, update SE.
1443 SE->forgetTopmostLoop(L);
1445
1446#ifdef EXPENSIVE_CHECKS
1447 // Finally DomtTree must be correct.
1448 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
1449#endif
1450
1451 // FIXME: Incrementally update loop-simplify
1452 simplifyLoop(L, &DT, LI, SE, AC, nullptr, PreserveLCSSA);
1453
1454 NumPeeled++;
1455 NumPeeledEnd += PeelLast;
1456
1457 return true;
1458}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
static void updateBranchWeights(Instruction *Term, WeightInfo &Info)
Update the branch weights of an exiting block of a peeled-off loop iteration.
Definition: LoopPeel.cpp:906
static cl::opt< bool > DisableAdvancedPeeling("disable-advanced-peeling", cl::init(false), cl::Hidden, cl::desc("Disable advance peeling. Issues for convergent targets (D134803)."))
static cl::opt< unsigned > UnrollPeelMaxCount("unroll-peel-max-count", cl::init(7), cl::Hidden, cl::desc("Max average trip count which will cause loop peeling."))
static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred, const SCEVAddRecExpr *LeftAR, const SCEV *RightSCEV, ScalarEvolution &SE, const TargetTransformInfo &TTI)
Returns true if the last iteration can be peeled off and the condition (Pred LeftAR,...
Definition: LoopPeel.cpp:503
static cl::opt< bool > UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden, cl::desc("Allows loops to be peeled when the dynamic " "trip count is known to be low."))
static cl::opt< unsigned > UnrollForcePeelCount("unroll-force-peel-count", cl::init(0), cl::Hidden, cl::desc("Force a peel count regardless of profiling information."))
static bool violatesLegacyMultiExitLoopCheck(Loop *L)
This "heuristic" exactly matches implicit behavior which used to exist inside getLoopEstimatedTripCou...
Definition: LoopPeel.cpp:722
static const char * PeeledCountMetaData
Definition: LoopPeel.cpp:88
static std::pair< unsigned, unsigned > countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE, const TargetTransformInfo &TTI)
Definition: LoopPeel.cpp:541
static cl::opt< bool > UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling", cl::init(false), cl::Hidden, cl::desc("Allows loop nests to be peeled."))
static cl::opt< unsigned > UnrollPeelCount("unroll-peel-count", cl::Hidden, cl::desc("Set the unroll peeling count, for testing purposes"))
static unsigned peelToTurnInvariantLoadsDerefencebale(Loop &L, DominatorTree &DT, AssumptionCache *AC)
Definition: LoopPeel.cpp:418
static cl::opt< bool > EnablePeelingForIV("enable-peeling-for-iv", cl::init(false), cl::Hidden, cl::desc("Enable peeling to convert Phi nodes into IVs"))
static void cloneLoopBlocks(Loop *L, unsigned IterNumber, bool PeelLast, BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *OrigPreHeader, SmallVectorImpl< std::pair< BasicBlock *, BasicBlock * > > &ExitEdges, SmallVectorImpl< BasicBlock * > &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT, LoopInfo *LI, ArrayRef< MDNode * > LoopLocalNoAliasDeclScopes, ScalarEvolution &SE)
Clones the body of the loop L, putting it between InsertTop and InsertBot.
Definition: LoopPeel.cpp:975
static void initBranchWeights(DenseMap< Instruction *, WeightInfo > &WeightInfos, Loop *L)
Initialize the weights for all exiting blocks.
Definition: LoopPeel.cpp:921
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define P(N)
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
std::pair< BasicBlock *, BasicBlock * > Edge
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
#define LLVM_DEBUG(...)
Definition: Debug.h:119
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:337
LLVM_ABI const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
Definition: BasicBlock.cpp:287
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
Definition: BasicBlock.cpp:252
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:233
Conditional or Unconditional Branch instruction.
unsigned getNumSuccessors() const
BasicBlock * getSuccessor(unsigned i) const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:678
@ ICMP_EQ
equal
Definition: InstrTypes.h:699
@ ICMP_NE
not equal
Definition: InstrTypes.h:700
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Definition: CmpPredicate.h:23
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:230
DomTreeNodeBase * getIDom() const
NodeT * getBlock() const
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:165
LLVM_ABI Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
Definition: Dominators.cpp:357
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:135
bool isEquality() const
Return true if this predicate is either EQ or NE.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2780
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Store the result of a depth first search within basic blocks contained by a single loop.
Definition: LoopIterator.h:97
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
Definition: LoopIterator.h:136
std::vector< BasicBlock * >::const_reverse_iterator RPOIterator
Definition: LoopIterator.h:101
void perform(const LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopInfo.cpp:1276
RPOIterator endRPO() const
Definition: LoopIterator.h:140
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:40
This class represents min/max intrinsics.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
This node represents a polynomial recurrence on the trip count of the specified loop.
LLVM_ABI const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents a constant integer value.
This class uses information about analyze scalars to rewrite expressions in canonical form.
bool isHighCostExpansion(ArrayRef< const SCEV * > Exprs, Loop *L, unsigned Budget, const TargetTransformInfo *TTI, const Instruction *At)
Return true for expressions that can't be evaluated at runtime within given Budget.
LLVM_ABI Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
static LLVM_ABI LoopGuards collect(const Loop *L, ScalarEvolution &SE)
Collect rewrite map for loop guards for loop L, together with flags indicating if NUW and NSW can be ...
The main scalar evolution driver.
const SCEV * getConstantMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEVConstant that is greater than or equal to (i.e.
LLVM_ABI bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
LLVM_ABI bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
LLVM_ABI void forgetTopmostLoop(const Loop *L)
LLVM_ABI void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI void forgetLcssaPhiWithNewPredecessor(Loop *L, PHINode *V)
Forget LCSSA phi node V of loop L to which a new predecessor was added, such that it may no longer be...
LLVM_ABI std::optional< MonotonicPredicateType > getMonotonicPredicateType(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred)
If, for all loop invariant X, the predicate "LHS `Pred` X" is monotonically increasing or decreasing,...
LLVM_ABI const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
LLVM_ABI std::optional< bool > evaluatePredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Check whether the condition described by Pred, LHS, and RHS is true or false.
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
This class represents the LLVM 'select' instruction.
void insert_range(Range &&R)
Definition: SmallPtrSet.h:490
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:476
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP) const
Get target-customized preferences for the generic loop peeling transformation.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:82
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:240
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:390
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:322
int getNumOccurrences() const
Definition: CommandLine.h:400
self_iterator getIterator()
Definition: ilist_node.h:134
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
Definition: PatternMatch.h:189
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
cst_pred_ty< is_one > m_scev_One()
Match an integer 1.
specificloop_ty m_SpecificLoop(const Loop *L)
SCEVAffineAddRec_match< Op0_t, Op1_t, class_match< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
bool match(const SCEV *S, const Pattern &P)
class_match< const SCEV > m_SCEV()
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
LLVM_ABI bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:860
LLVM_ABI std::optional< unsigned > getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight=nullptr)
Returns a loop's estimated trip count based on branch weight metadata.
Definition: LoopUtils.cpp:841
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
LLVM_ABI bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2491
auto successors(const MachineBasicBlock *BB)
bool canPeel(const Loop *L)
Definition: LoopPeel.cpp:91
bool canPeelLastIteration(const Loop &L, ScalarEvolution &SE)
Returns true if the last iteration of L can be peeled off.
Definition: LoopPeel.cpp:472
LLVM_ABI void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, unsigned V=0)
Set input string into loop metadata by keeping other values intact.
Definition: LoopUtils.cpp:215
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
Definition: LoopPeel.cpp:1128
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::PeelingPreferences &PP, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache *AC=nullptr, unsigned Threshold=UINT_MAX)
Definition: LoopPeel.cpp:744
LLVM_ABI cl::opt< unsigned > SCEVCheapExpansionBudget
LLVM_ABI std::optional< int > getOptionalIntLoopAttribute(const Loop *TheLoop, StringRef Name)
Find named metadata for a loop with an integer value.
Definition: LoopInfo.cpp:1125
LLVM_ABI void cloneAndAdaptNoAliasScopes(ArrayRef< MDNode * > NoAliasDeclScopes, ArrayRef< BasicBlock * > NewBlocks, LLVMContext &Context, StringRef Ext)
Clone the specified noalias decl scopes.
LLVM_ABI void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
LLVM_ABI bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Definition: Loads.cpp:252
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
LLVM_ABI void identifyNoAliasScopesToClone(ArrayRef< BasicBlock * > BBs, SmallVectorImpl< MDNode * > &NoAliasDeclScopes)
Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified basic blocks and extract ...
LLVM_ABI BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
LLVM_ABI bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put loop into LCSSA form.
Definition: LCSSA.cpp:427
bool peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, ScalarEvolution *SE, DominatorTree &DT, AssumptionCache *AC, bool PreserveLCSSA, ValueToValueMapTy &VMap)
VMap is the value-map that maps instructions from the original loop to instructions in the last peele...
Definition: LoopPeel.cpp:1173
LLVM_ABI Loop * cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI, LPPassManager *LPM)
Recursively clone the specified loop and all of its children, mapping the blocks with the specified m...
Definition: LoopUtils.cpp:1857
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:856
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858
SmallVector< uint32_t > Weights
Definition: LoopPeel.cpp:888
const SmallVector< uint32_t > SubWeights
Definition: LoopPeel.cpp:890
bool AllowPeeling
Allow peeling off loop iterations.
bool AllowLoopNestsPeeling
Allow peeling off loop iterations for loop nests.
bool PeelLast
Peel off the last PeelCount loop iterations.
bool PeelProfiledIterations
Allow peeling basing on profile.
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...