LLVM 21.0.0git
LoopUnrollPass.cpp
Go to the documentation of this file.
1//===- LoopUnroll.cpp - Loop unroller pass --------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass implements a simple loop unroller. It works best when loops have
10// been canonicalized by the -indvars pass, allowing it to determine the trip
11// counts of loops easily.
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/DenseMap.h"
17#include "llvm/ADT/DenseSet.h"
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/SetVector.h"
22#include "llvm/ADT/StringRef.h"
35#include "llvm/IR/BasicBlock.h"
36#include "llvm/IR/CFG.h"
37#include "llvm/IR/Constant.h"
38#include "llvm/IR/Constants.h"
40#include "llvm/IR/Dominators.h"
41#include "llvm/IR/Function.h"
42#include "llvm/IR/Instruction.h"
45#include "llvm/IR/Metadata.h"
46#include "llvm/IR/PassManager.h"
48#include "llvm/Pass.h"
51#include "llvm/Support/Debug.h"
63#include <algorithm>
64#include <cassert>
65#include <cstdint>
66#include <limits>
67#include <optional>
68#include <string>
69#include <tuple>
70#include <utility>
71
72using namespace llvm;
73
74#define DEBUG_TYPE "loop-unroll"
75
77 "forget-scev-loop-unroll", cl::init(false), cl::Hidden,
78 cl::desc("Forget everything in SCEV when doing LoopUnroll, instead of just"
79 " the current top-most loop. This is sometimes preferred to reduce"
80 " compile time."));
81
83 UnrollThreshold("unroll-threshold", cl::Hidden,
84 cl::desc("The cost threshold for loop unrolling"));
85
88 "unroll-optsize-threshold", cl::init(0), cl::Hidden,
89 cl::desc("The cost threshold for loop unrolling when optimizing for "
90 "size"));
91
93 "unroll-partial-threshold", cl::Hidden,
94 cl::desc("The cost threshold for partial loop unrolling"));
95
97 "unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden,
98 cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied "
99 "to the threshold when aggressively unrolling a loop due to the "
100 "dynamic cost savings. If completely unrolling a loop will reduce "
101 "the total runtime from X to Y, we boost the loop unroll "
102 "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, "
103 "X/Y). This limit avoids excessive code bloat."));
104
106 "unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden,
107 cl::desc("Don't allow loop unrolling to simulate more than this number of "
108 "iterations when checking full unroll profitability"));
109
111 "unroll-count", cl::Hidden,
112 cl::desc("Use this unroll count for all loops including those with "
113 "unroll_count pragma values, for testing purposes"));
114
116 "unroll-max-count", cl::Hidden,
117 cl::desc("Set the max unroll count for partial and runtime unrolling, for"
118 "testing purposes"));
119
121 "unroll-full-max-count", cl::Hidden,
122 cl::desc(
123 "Set the max unroll count for full unrolling, for testing purposes"));
124
125static cl::opt<bool>
126 UnrollAllowPartial("unroll-allow-partial", cl::Hidden,
127 cl::desc("Allows loops to be partially unrolled until "
128 "-unroll-threshold loop size is reached."));
129
131 "unroll-allow-remainder", cl::Hidden,
132 cl::desc("Allow generation of a loop remainder (extra iterations) "
133 "when unrolling a loop."));
134
135static cl::opt<bool>
136 UnrollRuntime("unroll-runtime", cl::Hidden,
137 cl::desc("Unroll loops with run-time trip counts"));
138
140 "unroll-max-upperbound", cl::init(8), cl::Hidden,
141 cl::desc(
142 "The max of trip count upper bound that is considered in unrolling"));
143
145 "pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden,
146 cl::desc("Unrolled size limit for loops with an unroll(full) or "
147 "unroll_count pragma."));
148
150 "flat-loop-tripcount-threshold", cl::init(5), cl::Hidden,
151 cl::desc("If the runtime tripcount for the loop is lower than the "
152 "threshold, the loop is considered as flat and will be less "
153 "aggressively unrolled."));
154
156 "unroll-remainder", cl::Hidden,
157 cl::desc("Allow the loop remainder to be unrolled."));
158
159// This option isn't ever intended to be enabled, it serves to allow
160// experiments to check the assumptions about when this kind of revisit is
161// necessary.
163 "unroll-revisit-child-loops", cl::Hidden,
164 cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. "
165 "This shouldn't typically be needed as child loops (or their "
166 "clones) were already visited."));
167
169 "unroll-threshold-aggressive", cl::init(300), cl::Hidden,
170 cl::desc("Threshold (max size of unrolled loop) to use in aggressive (O3) "
171 "optimizations"));
173 UnrollThresholdDefault("unroll-threshold-default", cl::init(150),
175 cl::desc("Default threshold (max size of unrolled "
176 "loop), used in all but O3 optimizations"));
177
179 "pragma-unroll-full-max-iterations", cl::init(1'000'000), cl::Hidden,
180 cl::desc("Maximum allowed iterations to unroll under pragma unroll full."));
181
182/// A magic value for use with the Threshold parameter to indicate
183/// that the loop unroll should be performed regardless of how much
184/// code expansion would result.
185static const unsigned NoThreshold = std::numeric_limits<unsigned>::max();
186
187/// Gather the various unrolling parameters based on the defaults, compiler
188/// flags, TTI overrides and user specified parameters.
192 OptimizationRemarkEmitter &ORE, int OptLevel,
193 std::optional<unsigned> UserThreshold, std::optional<unsigned> UserCount,
194 std::optional<bool> UserAllowPartial, std::optional<bool> UserRuntime,
195 std::optional<bool> UserUpperBound,
196 std::optional<unsigned> UserFullUnrollMaxCount) {
198
199 // Set up the defaults
200 UP.Threshold =
204 UP.PartialThreshold = 150;
206 UP.Count = 0;
208 UP.MaxCount = std::numeric_limits<unsigned>::max();
210 UP.FullUnrollMaxCount = std::numeric_limits<unsigned>::max();
211 UP.BEInsns = 2;
212 UP.Partial = false;
213 UP.Runtime = false;
214 UP.AllowRemainder = true;
215 UP.UnrollRemainder = false;
216 UP.AllowExpensiveTripCount = false;
217 UP.Force = false;
218 UP.UpperBound = false;
219 UP.UnrollAndJam = false;
223 UP.RuntimeUnrollMultiExit = false;
224
225 // Override with any target specific settings
226 TTI.getUnrollingPreferences(L, SE, UP, &ORE);
227
228 // Apply size attributes
229 bool OptForSize = L->getHeader()->getParent()->hasOptSize() ||
230 // Let unroll hints / pragmas take precedence over PGSO.
232 llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
233 PGSOQueryType::IRPass));
234 if (OptForSize) {
238 }
239
240 // Apply any user values specified by cl::opt
241 if (UnrollThreshold.getNumOccurrences() > 0)
243 if (UnrollPartialThreshold.getNumOccurrences() > 0)
245 if (UnrollMaxPercentThresholdBoost.getNumOccurrences() > 0)
247 if (UnrollMaxCount.getNumOccurrences() > 0)
249 if (UnrollMaxUpperBound.getNumOccurrences() > 0)
251 if (UnrollFullMaxCount.getNumOccurrences() > 0)
260 UP.UpperBound = false;
263 if (UnrollMaxIterationsCountToAnalyze.getNumOccurrences() > 0)
265
266 // Apply user values provided by argument
267 if (UserThreshold) {
268 UP.Threshold = *UserThreshold;
269 UP.PartialThreshold = *UserThreshold;
270 }
271 if (UserCount)
272 UP.Count = *UserCount;
273 if (UserAllowPartial)
274 UP.Partial = *UserAllowPartial;
275 if (UserRuntime)
276 UP.Runtime = *UserRuntime;
277 if (UserUpperBound)
278 UP.UpperBound = *UserUpperBound;
279 if (UserFullUnrollMaxCount)
280 UP.FullUnrollMaxCount = *UserFullUnrollMaxCount;
281
282 return UP;
283}
284
285namespace {
286
287/// A struct to densely store the state of an instruction after unrolling at
288/// each iteration.
289///
290/// This is designed to work like a tuple of <Instruction *, int> for the
291/// purposes of hashing and lookup, but to be able to associate two boolean
292/// states with each key.
293struct UnrolledInstState {
294 Instruction *I;
295 int Iteration : 30;
296 unsigned IsFree : 1;
297 unsigned IsCounted : 1;
298};
299
300/// Hashing and equality testing for a set of the instruction states.
301struct UnrolledInstStateKeyInfo {
302 using PtrInfo = DenseMapInfo<Instruction *>;
304
305 static inline UnrolledInstState getEmptyKey() {
306 return {PtrInfo::getEmptyKey(), 0, 0, 0};
307 }
308
309 static inline UnrolledInstState getTombstoneKey() {
310 return {PtrInfo::getTombstoneKey(), 0, 0, 0};
311 }
312
313 static inline unsigned getHashValue(const UnrolledInstState &S) {
314 return PairInfo::getHashValue({S.I, S.Iteration});
315 }
316
317 static inline bool isEqual(const UnrolledInstState &LHS,
318 const UnrolledInstState &RHS) {
319 return PairInfo::isEqual({LHS.I, LHS.Iteration}, {RHS.I, RHS.Iteration});
320 }
321};
322
323struct EstimatedUnrollCost {
324 /// The estimated cost after unrolling.
325 unsigned UnrolledCost;
326
327 /// The estimated dynamic cost of executing the instructions in the
328 /// rolled form.
329 unsigned RolledDynamicCost;
330};
331
332struct PragmaInfo {
333 PragmaInfo(bool UUC, bool PFU, unsigned PC, bool PEU)
334 : UserUnrollCount(UUC), PragmaFullUnroll(PFU), PragmaCount(PC),
335 PragmaEnableUnroll(PEU) {}
336 const bool UserUnrollCount;
337 const bool PragmaFullUnroll;
338 const unsigned PragmaCount;
339 const bool PragmaEnableUnroll;
340};
341
342} // end anonymous namespace
343
344/// Figure out if the loop is worth full unrolling.
345///
346/// Complete loop unrolling can make some loads constant, and we need to know
347/// if that would expose any further optimization opportunities. This routine
348/// estimates this optimization. It computes cost of unrolled loop
349/// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By
350/// dynamic cost we mean that we won't count costs of blocks that are known not
351/// to be executed (i.e. if we have a branch in the loop and we know that at the
352/// given iteration its condition would be resolved to true, we won't add up the
353/// cost of the 'false'-block).
354/// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
355/// the analysis failed (no benefits expected from the unrolling, or the loop is
356/// too big to analyze), the returned value is std::nullopt.
357static std::optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
358 const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE,
359 const SmallPtrSetImpl<const Value *> &EphValues,
360 const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize,
361 unsigned MaxIterationsCountToAnalyze) {
362 // We want to be able to scale offsets by the trip count and add more offsets
363 // to them without checking for overflows, and we already don't want to
364 // analyze *massive* trip counts, so we force the max to be reasonably small.
365 assert(MaxIterationsCountToAnalyze <
366 (unsigned)(std::numeric_limits<int>::max() / 2) &&
367 "The unroll iterations max is too large!");
368
369 // Only analyze inner loops. We can't properly estimate cost of nested loops
370 // and we won't visit inner loops again anyway.
371 if (!L->isInnermost())
372 return std::nullopt;
373
374 // Don't simulate loops with a big or unknown tripcount
375 if (!TripCount || TripCount > MaxIterationsCountToAnalyze)
376 return std::nullopt;
377
380 DenseMap<Value *, Value *> SimplifiedValues;
381 SmallVector<std::pair<Value *, Value *>, 4> SimplifiedInputValues;
382
383 // The estimated cost of the unrolled form of the loop. We try to estimate
384 // this by simplifying as much as we can while computing the estimate.
385 InstructionCost UnrolledCost = 0;
386
387 // We also track the estimated dynamic (that is, actually executed) cost in
388 // the rolled form. This helps identify cases when the savings from unrolling
389 // aren't just exposing dead control flows, but actual reduced dynamic
390 // instructions due to the simplifications which we expect to occur after
391 // unrolling.
392 InstructionCost RolledDynamicCost = 0;
393
394 // We track the simplification of each instruction in each iteration. We use
395 // this to recursively merge costs into the unrolled cost on-demand so that
396 // we don't count the cost of any dead code. This is essentially a map from
397 // <instruction, int> to <bool, bool>, but stored as a densely packed struct.
399
400 // A small worklist used to accumulate cost of instructions from each
401 // observable and reached root in the loop.
403
404 // PHI-used worklist used between iterations while accumulating cost.
406
407 // Helper function to accumulate cost for instructions in the loop.
408 auto AddCostRecursively = [&](Instruction &RootI, int Iteration) {
409 assert(Iteration >= 0 && "Cannot have a negative iteration!");
410 assert(CostWorklist.empty() && "Must start with an empty cost list");
411 assert(PHIUsedList.empty() && "Must start with an empty phi used list");
412 CostWorklist.push_back(&RootI);
414 RootI.getFunction()->hasMinSize() ?
417 for (;; --Iteration) {
418 do {
419 Instruction *I = CostWorklist.pop_back_val();
420
421 // InstCostMap only uses I and Iteration as a key, the other two values
422 // don't matter here.
423 auto CostIter = InstCostMap.find({I, Iteration, 0, 0});
424 if (CostIter == InstCostMap.end())
425 // If an input to a PHI node comes from a dead path through the loop
426 // we may have no cost data for it here. What that actually means is
427 // that it is free.
428 continue;
429 auto &Cost = *CostIter;
430 if (Cost.IsCounted)
431 // Already counted this instruction.
432 continue;
433
434 // Mark that we are counting the cost of this instruction now.
435 Cost.IsCounted = true;
436
437 // If this is a PHI node in the loop header, just add it to the PHI set.
438 if (auto *PhiI = dyn_cast<PHINode>(I))
439 if (PhiI->getParent() == L->getHeader()) {
440 assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they "
441 "inherently simplify during unrolling.");
442 if (Iteration == 0)
443 continue;
444
445 // Push the incoming value from the backedge into the PHI used list
446 // if it is an in-loop instruction. We'll use this to populate the
447 // cost worklist for the next iteration (as we count backwards).
448 if (auto *OpI = dyn_cast<Instruction>(
449 PhiI->getIncomingValueForBlock(L->getLoopLatch())))
450 if (L->contains(OpI))
451 PHIUsedList.push_back(OpI);
452 continue;
453 }
454
455 // First accumulate the cost of this instruction.
456 if (!Cost.IsFree) {
457 // Consider simplified operands in instruction cost.
459 transform(I->operands(), std::back_inserter(Operands),
460 [&](Value *Op) {
461 if (auto Res = SimplifiedValues.lookup(Op))
462 return Res;
463 return Op;
464 });
465 UnrolledCost += TTI.getInstructionCost(I, Operands, CostKind);
466 LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration "
467 << Iteration << "): ");
468 LLVM_DEBUG(I->dump());
469 }
470
471 // We must count the cost of every operand which is not free,
472 // recursively. If we reach a loop PHI node, simply add it to the set
473 // to be considered on the next iteration (backwards!).
474 for (Value *Op : I->operands()) {
475 // Check whether this operand is free due to being a constant or
476 // outside the loop.
477 auto *OpI = dyn_cast<Instruction>(Op);
478 if (!OpI || !L->contains(OpI))
479 continue;
480
481 // Otherwise accumulate its cost.
482 CostWorklist.push_back(OpI);
483 }
484 } while (!CostWorklist.empty());
485
486 if (PHIUsedList.empty())
487 // We've exhausted the search.
488 break;
489
490 assert(Iteration > 0 &&
491 "Cannot track PHI-used values past the first iteration!");
492 CostWorklist.append(PHIUsedList.begin(), PHIUsedList.end());
493 PHIUsedList.clear();
494 }
495 };
496
497 // Ensure that we don't violate the loop structure invariants relied on by
498 // this analysis.
499 assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.");
500 assert(L->isLCSSAForm(DT) &&
501 "Must have loops in LCSSA form to track live-out values.");
502
503 LLVM_DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");
504
506 L->getHeader()->getParent()->hasMinSize() ?
508 // Simulate execution of each iteration of the loop counting instructions,
509 // which would be simplified.
510 // Since the same load will take different values on different iterations,
511 // we literally have to go through all loop's iterations.
512 for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
513 LLVM_DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");
514
515 // Prepare for the iteration by collecting any simplified entry or backedge
516 // inputs.
517 for (Instruction &I : *L->getHeader()) {
518 auto *PHI = dyn_cast<PHINode>(&I);
519 if (!PHI)
520 break;
521
522 // The loop header PHI nodes must have exactly two input: one from the
523 // loop preheader and one from the loop latch.
524 assert(
525 PHI->getNumIncomingValues() == 2 &&
526 "Must have an incoming value only for the preheader and the latch.");
527
528 Value *V = PHI->getIncomingValueForBlock(
529 Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());
530 if (Iteration != 0 && SimplifiedValues.count(V))
531 V = SimplifiedValues.lookup(V);
532 SimplifiedInputValues.push_back({PHI, V});
533 }
534
535 // Now clear and re-populate the map for the next iteration.
536 SimplifiedValues.clear();
537 while (!SimplifiedInputValues.empty())
538 SimplifiedValues.insert(SimplifiedInputValues.pop_back_val());
539
540 UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE, L);
541
542 BBWorklist.clear();
543 BBWorklist.insert(L->getHeader());
544 // Note that we *must not* cache the size, this loop grows the worklist.
545 for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
546 BasicBlock *BB = BBWorklist[Idx];
547
548 // Visit all instructions in the given basic block and try to simplify
549 // it. We don't change the actual IR, just count optimization
550 // opportunities.
551 for (Instruction &I : *BB) {
552 // These won't get into the final code - don't even try calculating the
553 // cost for them.
554 if (isa<DbgInfoIntrinsic>(I) || EphValues.count(&I))
555 continue;
556
557 // Track this instruction's expected baseline cost when executing the
558 // rolled loop form.
559 RolledDynamicCost += TTI.getInstructionCost(&I, CostKind);
560
561 // Visit the instruction to analyze its loop cost after unrolling,
562 // and if the visitor returns true, mark the instruction as free after
563 // unrolling and continue.
564 bool IsFree = Analyzer.visit(I);
565 bool Inserted = InstCostMap.insert({&I, (int)Iteration,
566 (unsigned)IsFree,
567 /*IsCounted*/ false}).second;
568 (void)Inserted;
569 assert(Inserted && "Cannot have a state for an unvisited instruction!");
570
571 if (IsFree)
572 continue;
573
574 // Can't properly model a cost of a call.
575 // FIXME: With a proper cost model we should be able to do it.
576 if (auto *CI = dyn_cast<CallInst>(&I)) {
577 const Function *Callee = CI->getCalledFunction();
578 if (!Callee || TTI.isLoweredToCall(Callee)) {
579 LLVM_DEBUG(dbgs() << "Can't analyze cost of loop with call\n");
580 return std::nullopt;
581 }
582 }
583
584 // If the instruction might have a side-effect recursively account for
585 // the cost of it and all the instructions leading up to it.
586 if (I.mayHaveSideEffects())
587 AddCostRecursively(I, Iteration);
588
589 // If unrolled body turns out to be too big, bail out.
590 if (UnrolledCost > MaxUnrolledLoopSize) {
591 LLVM_DEBUG(dbgs() << " Exceeded threshold.. exiting.\n"
592 << " UnrolledCost: " << UnrolledCost
593 << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
594 << "\n");
595 return std::nullopt;
596 }
597 }
598
599 Instruction *TI = BB->getTerminator();
600
601 auto getSimplifiedConstant = [&](Value *V) -> Constant * {
602 if (SimplifiedValues.count(V))
603 V = SimplifiedValues.lookup(V);
604 return dyn_cast<Constant>(V);
605 };
606
607 // Add in the live successors by first checking whether we have terminator
608 // that may be simplified based on the values simplified by this call.
609 BasicBlock *KnownSucc = nullptr;
610 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
611 if (BI->isConditional()) {
612 if (auto *SimpleCond = getSimplifiedConstant(BI->getCondition())) {
613 // Just take the first successor if condition is undef
614 if (isa<UndefValue>(SimpleCond))
615 KnownSucc = BI->getSuccessor(0);
616 else if (ConstantInt *SimpleCondVal =
617 dyn_cast<ConstantInt>(SimpleCond))
618 KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0);
619 }
620 }
621 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
622 if (auto *SimpleCond = getSimplifiedConstant(SI->getCondition())) {
623 // Just take the first successor if condition is undef
624 if (isa<UndefValue>(SimpleCond))
625 KnownSucc = SI->getSuccessor(0);
626 else if (ConstantInt *SimpleCondVal =
627 dyn_cast<ConstantInt>(SimpleCond))
628 KnownSucc = SI->findCaseValue(SimpleCondVal)->getCaseSuccessor();
629 }
630 }
631 if (KnownSucc) {
632 if (L->contains(KnownSucc))
633 BBWorklist.insert(KnownSucc);
634 else
635 ExitWorklist.insert({BB, KnownSucc});
636 continue;
637 }
638
639 // Add BB's successors to the worklist.
640 for (BasicBlock *Succ : successors(BB))
641 if (L->contains(Succ))
642 BBWorklist.insert(Succ);
643 else
644 ExitWorklist.insert({BB, Succ});
645 AddCostRecursively(*TI, Iteration);
646 }
647
648 // If we found no optimization opportunities on the first iteration, we
649 // won't find them on later ones too.
650 if (UnrolledCost == RolledDynamicCost) {
651 LLVM_DEBUG(dbgs() << " No opportunities found.. exiting.\n"
652 << " UnrolledCost: " << UnrolledCost << "\n");
653 return std::nullopt;
654 }
655 }
656
657 while (!ExitWorklist.empty()) {
658 BasicBlock *ExitingBB, *ExitBB;
659 std::tie(ExitingBB, ExitBB) = ExitWorklist.pop_back_val();
660
661 for (Instruction &I : *ExitBB) {
662 auto *PN = dyn_cast<PHINode>(&I);
663 if (!PN)
664 break;
665
666 Value *Op = PN->getIncomingValueForBlock(ExitingBB);
667 if (auto *OpI = dyn_cast<Instruction>(Op))
668 if (L->contains(OpI))
669 AddCostRecursively(*OpI, TripCount - 1);
670 }
671 }
672
673 assert(UnrolledCost.isValid() && RolledDynamicCost.isValid() &&
674 "All instructions must have a valid cost, whether the "
675 "loop is rolled or unrolled.");
676
677 LLVM_DEBUG(dbgs() << "Analysis finished:\n"
678 << "UnrolledCost: " << UnrolledCost << ", "
679 << "RolledDynamicCost: " << RolledDynamicCost << "\n");
680 return {{unsigned(*UnrolledCost.getValue()),
681 unsigned(*RolledDynamicCost.getValue())}};
682}
683
685 const Loop *L, const TargetTransformInfo &TTI,
686 const SmallPtrSetImpl<const Value *> &EphValues, unsigned BEInsns) {
688 for (BasicBlock *BB : L->blocks())
689 Metrics.analyzeBasicBlock(BB, TTI, EphValues, /* PrepareForLTO= */ false,
690 L);
691 NumInlineCandidates = Metrics.NumInlineCandidates;
692 NotDuplicatable = Metrics.notDuplicatable;
693 Convergence = Metrics.Convergence;
694 LoopSize = Metrics.NumInsts;
696 Metrics.Convergence != ConvergenceKind::Uncontrolled &&
698
699 // Don't allow an estimate of size zero. This would allows unrolling of loops
700 // with huge iteration counts, which is a compile time problem even if it's
701 // not a problem for code quality. Also, the code using this size may assume
702 // that each loop has at least three instructions (likely a conditional
703 // branch, a comparison feeding that branch, and some kind of loop increment
704 // feeding that comparison instruction).
705 if (LoopSize.isValid() && LoopSize < BEInsns + 1)
706 // This is an open coded max() on InstructionCost
707 LoopSize = BEInsns + 1;
708}
709
711 switch (Convergence) {
713 LLVM_DEBUG(dbgs() << " Convergence prevents unrolling.\n");
714 return false;
715 default:
716 break;
717 }
718 if (!LoopSize.isValid()) {
719 LLVM_DEBUG(dbgs() << " Invalid loop size prevents unrolling.\n");
720 return false;
721 }
722 if (NotDuplicatable) {
723 LLVM_DEBUG(dbgs() << " Non-duplicatable blocks prevent unrolling.\n");
724 return false;
725 }
726 return true;
727}
728
731 unsigned CountOverwrite) const {
732 unsigned LS = *LoopSize.getValue();
733 assert(LS >= UP.BEInsns && "LoopSize should not be less than BEInsns!");
734 if (CountOverwrite)
735 return static_cast<uint64_t>(LS - UP.BEInsns) * CountOverwrite + UP.BEInsns;
736 else
737 return static_cast<uint64_t>(LS - UP.BEInsns) * UP.Count + UP.BEInsns;
738}
739
740// Returns the loop hint metadata node with the given name (for example,
741// "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is
742// returned.
744 if (MDNode *LoopID = L->getLoopID())
745 return GetUnrollMetadata(LoopID, Name);
746 return nullptr;
747}
748
749// Returns true if the loop has an unroll(full) pragma.
750static bool hasUnrollFullPragma(const Loop *L) {
751 return getUnrollMetadataForLoop(L, "llvm.loop.unroll.full");
752}
753
754// Returns true if the loop has an unroll(enable) pragma. This metadata is used
755// for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives.
756static bool hasUnrollEnablePragma(const Loop *L) {
757 return getUnrollMetadataForLoop(L, "llvm.loop.unroll.enable");
758}
759
760// Returns true if the loop has an runtime unroll(disable) pragma.
761static bool hasRuntimeUnrollDisablePragma(const Loop *L) {
762 return getUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable");
763}
764
765// If loop has an unroll_count pragma return the (necessarily
766// positive) value from the pragma. Otherwise return 0.
767static unsigned unrollCountPragmaValue(const Loop *L) {
768 MDNode *MD = getUnrollMetadataForLoop(L, "llvm.loop.unroll.count");
769 if (MD) {
770 assert(MD->getNumOperands() == 2 &&
771 "Unroll count hint metadata should have two operands.");
772 unsigned Count =
773 mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
774 assert(Count >= 1 && "Unroll count must be positive.");
775 return Count;
776 }
777 return 0;
778}
779
780// Computes the boosting factor for complete unrolling.
781// If fully unrolling the loop would save a lot of RolledDynamicCost, it would
782// be beneficial to fully unroll the loop even if unrolledcost is large. We
783// use (RolledDynamicCost / UnrolledCost) to model the unroll benefits to adjust
784// the unroll threshold.
785static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost,
786 unsigned MaxPercentThresholdBoost) {
787 if (Cost.RolledDynamicCost >= std::numeric_limits<unsigned>::max() / 100)
788 return 100;
789 else if (Cost.UnrolledCost != 0)
790 // The boosting factor is RolledDynamicCost / UnrolledCost
791 return std::min(100 * Cost.RolledDynamicCost / Cost.UnrolledCost,
792 MaxPercentThresholdBoost);
793 else
794 return MaxPercentThresholdBoost;
795}
796
797static std::optional<unsigned>
798shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo,
799 const unsigned TripMultiple, const unsigned TripCount,
800 unsigned MaxTripCount, const UnrollCostEstimator UCE,
802
803 // Using unroll pragma
804 // 1st priority is unroll count set by "unroll-count" option.
805
806 if (PInfo.UserUnrollCount) {
807 if (UP.AllowRemainder &&
808 UCE.getUnrolledLoopSize(UP, (unsigned)UnrollCount) < UP.Threshold)
809 return (unsigned)UnrollCount;
810 }
811
812 // 2nd priority is unroll count set by pragma.
813 if (PInfo.PragmaCount > 0) {
814 if ((UP.AllowRemainder || (TripMultiple % PInfo.PragmaCount == 0)))
815 return PInfo.PragmaCount;
816 }
817
818 if (PInfo.PragmaFullUnroll && TripCount != 0) {
819 // Certain cases with UBSAN can cause trip count to be calculated as
820 // INT_MAX, Block full unrolling at a reasonable limit so that the compiler
821 // doesn't hang trying to unroll the loop. See PR77842
822 if (TripCount > PragmaUnrollFullMaxIterations) {
823 LLVM_DEBUG(dbgs() << "Won't unroll; trip count is too large\n");
824 return std::nullopt;
825 }
826
827 return TripCount;
828 }
829
830 if (PInfo.PragmaEnableUnroll && !TripCount && MaxTripCount &&
831 MaxTripCount <= UP.MaxUpperBound)
832 return MaxTripCount;
833
834 // if didn't return until here, should continue to other priorties
835 return std::nullopt;
836}
837
838static std::optional<unsigned> shouldFullUnroll(
841 const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE,
843 assert(FullUnrollTripCount && "should be non-zero!");
844
845 if (FullUnrollTripCount > UP.FullUnrollMaxCount)
846 return std::nullopt;
847
848 // When computing the unrolled size, note that BEInsns are not replicated
849 // like the rest of the loop body.
850 if (UCE.getUnrolledLoopSize(UP) < UP.Threshold)
851 return FullUnrollTripCount;
852
853 // The loop isn't that small, but we still can fully unroll it if that
854 // helps to remove a significant number of instructions.
855 // To check that, run additional analysis on the loop.
856 if (std::optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
857 L, FullUnrollTripCount, DT, SE, EphValues, TTI,
860 unsigned Boost =
862 if (Cost->UnrolledCost < UP.Threshold * Boost / 100)
863 return FullUnrollTripCount;
864 }
865 return std::nullopt;
866}
867
868static std::optional<unsigned>
869shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount,
870 const UnrollCostEstimator UCE,
872
873 if (!TripCount)
874 return std::nullopt;
875
876 if (!UP.Partial) {
877 LLVM_DEBUG(dbgs() << " will not try to unroll partially because "
878 << "-unroll-allow-partial not given\n");
879 return 0;
880 }
881 unsigned count = UP.Count;
882 if (count == 0)
883 count = TripCount;
884 if (UP.PartialThreshold != NoThreshold) {
885 // Reduce unroll count to be modulo of TripCount for partial unrolling.
887 count = (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
888 (LoopSize - UP.BEInsns);
889 if (count > UP.MaxCount)
890 count = UP.MaxCount;
891 while (count != 0 && TripCount % count != 0)
892 count--;
893 if (UP.AllowRemainder && count <= 1) {
894 // If there is no Count that is modulo of TripCount, set Count to
895 // largest power-of-two factor that satisfies the threshold limit.
896 // As we'll create fixup loop, do the type of unrolling only if
897 // remainder loop is allowed.
899 while (count != 0 &&
901 count >>= 1;
902 }
903 if (count < 2) {
904 count = 0;
905 }
906 } else {
907 count = TripCount;
908 }
909 if (count > UP.MaxCount)
910 count = UP.MaxCount;
911
912 LLVM_DEBUG(dbgs() << " partially unrolling with count: " << count << "\n");
913
914 return count;
915}
916// Returns true if unroll count was set explicitly.
917// Calculates unroll count and writes it to UP.Count.
918// Unless IgnoreUser is true, will also use metadata and command-line options
919// that are specific to to the LoopUnroll pass (which, for instance, are
920// irrelevant for the LoopUnrollAndJam pass).
921// FIXME: This function is used by LoopUnroll and LoopUnrollAndJam, but consumes
922// many LoopUnroll-specific options. The shared functionality should be
923// refactored into it own function.
927 const SmallPtrSetImpl<const Value *> &EphValues,
928 OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount,
929 bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE,
931 TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) {
932
933 unsigned LoopSize = UCE.getRolledLoopSize();
934
935 const bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
936 const bool PragmaFullUnroll = hasUnrollFullPragma(L);
937 const unsigned PragmaCount = unrollCountPragmaValue(L);
938 const bool PragmaEnableUnroll = hasUnrollEnablePragma(L);
939
940 const bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
941 PragmaEnableUnroll || UserUnrollCount;
942
943 PragmaInfo PInfo(UserUnrollCount, PragmaFullUnroll, PragmaCount,
944 PragmaEnableUnroll);
945 // Use an explicit peel count that has been specified for testing. In this
946 // case it's not permitted to also specify an explicit unroll count.
947 if (PP.PeelCount) {
948 if (UnrollCount.getNumOccurrences() > 0) {
949 report_fatal_error("Cannot specify both explicit peel count and "
950 "explicit unroll count", /*GenCrashDiag=*/false);
951 }
952 UP.Count = 1;
953 UP.Runtime = false;
954 return true;
955 }
956 // Check for explicit Count.
957 // 1st priority is unroll count set by "unroll-count" option.
958 // 2nd priority is unroll count set by pragma.
959 if (auto UnrollFactor = shouldPragmaUnroll(L, PInfo, TripMultiple, TripCount,
960 MaxTripCount, UCE, UP)) {
961 UP.Count = *UnrollFactor;
962
963 if (UserUnrollCount || (PragmaCount > 0)) {
964 UP.AllowExpensiveTripCount = true;
965 UP.Force = true;
966 }
967 UP.Runtime |= (PragmaCount > 0);
968 return ExplicitUnroll;
969 } else {
970 if (ExplicitUnroll && TripCount != 0) {
971 // If the loop has an unrolling pragma, we want to be more aggressive with
972 // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
973 // value which is larger than the default limits.
974 UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
976 std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
977 }
978 }
979
980 // 3rd priority is exact full unrolling. This will eliminate all copies
981 // of some exit test.
982 UP.Count = 0;
983 if (TripCount) {
984 UP.Count = TripCount;
985 if (auto UnrollFactor = shouldFullUnroll(L, TTI, DT, SE, EphValues,
986 TripCount, UCE, UP)) {
987 UP.Count = *UnrollFactor;
988 UseUpperBound = false;
989 return ExplicitUnroll;
990 }
991 }
992
993 // 4th priority is bounded unrolling.
994 // We can unroll by the upper bound amount if it's generally allowed or if
995 // we know that the loop is executed either the upper bound or zero times.
996 // (MaxOrZero unrolling keeps only the first loop test, so the number of
997 // loop tests remains the same compared to the non-unrolled version, whereas
998 // the generic upper bound unrolling keeps all but the last loop test so the
999 // number of loop tests goes up which may end up being worse on targets with
1000 // constrained branch predictor resources so is controlled by an option.)
1001 // In addition we only unroll small upper bounds.
1002 // Note that the cost of bounded unrolling is always strictly greater than
1003 // cost of exact full unrolling. As such, if we have an exact count and
1004 // found it unprofitable, we'll never chose to bounded unroll.
1005 if (!TripCount && MaxTripCount && (UP.UpperBound || MaxOrZero) &&
1006 MaxTripCount <= UP.MaxUpperBound) {
1007 UP.Count = MaxTripCount;
1008 if (auto UnrollFactor = shouldFullUnroll(L, TTI, DT, SE, EphValues,
1009 MaxTripCount, UCE, UP)) {
1010 UP.Count = *UnrollFactor;
1011 UseUpperBound = true;
1012 return ExplicitUnroll;
1013 }
1014 }
1015
1016 // 5th priority is loop peeling.
1017 computePeelCount(L, LoopSize, PP, TripCount, DT, SE, AC, UP.Threshold);
1018 if (PP.PeelCount) {
1019 UP.Runtime = false;
1020 UP.Count = 1;
1021 return ExplicitUnroll;
1022 }
1023
1024 // Before starting partial unrolling, set up.partial to true,
1025 // if user explicitly asked for unrolling
1026 if (TripCount)
1027 UP.Partial |= ExplicitUnroll;
1028
1029 // 6th priority is partial unrolling.
1030 // Try partial unroll only when TripCount could be statically calculated.
1031 if (auto UnrollFactor = shouldPartialUnroll(LoopSize, TripCount, UCE, UP)) {
1032 UP.Count = *UnrollFactor;
1033
1034 if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
1035 UP.Count != TripCount)
1036 ORE->emit([&]() {
1038 "FullUnrollAsDirectedTooLarge",
1039 L->getStartLoc(), L->getHeader())
1040 << "Unable to fully unroll loop as directed by unroll pragma "
1041 "because "
1042 "unrolled size is too large.";
1043 });
1044
1045 if (UP.PartialThreshold != NoThreshold) {
1046 if (UP.Count == 0) {
1047 if (PragmaEnableUnroll)
1048 ORE->emit([&]() {
1050 "UnrollAsDirectedTooLarge",
1051 L->getStartLoc(), L->getHeader())
1052 << "Unable to unroll loop as directed by unroll(enable) "
1053 "pragma "
1054 "because unrolled size is too large.";
1055 });
1056 }
1057 }
1058 return ExplicitUnroll;
1059 }
1060 assert(TripCount == 0 &&
1061 "All cases when TripCount is constant should be covered here.");
1062 if (PragmaFullUnroll)
1063 ORE->emit([&]() {
1065 DEBUG_TYPE, "CantFullUnrollAsDirectedRuntimeTripCount",
1066 L->getStartLoc(), L->getHeader())
1067 << "Unable to fully unroll loop as directed by unroll(full) "
1068 "pragma "
1069 "because loop has a runtime trip count.";
1070 });
1071
1072 // 7th priority is runtime unrolling.
1073 // Don't unroll a runtime trip count loop when it is disabled.
1075 UP.Count = 0;
1076 return false;
1077 }
1078
1079 // Don't unroll a small upper bound loop unless user or TTI asked to do so.
1080 if (MaxTripCount && !UP.Force && MaxTripCount < UP.MaxUpperBound) {
1081 UP.Count = 0;
1082 return false;
1083 }
1084
1085 // Check if the runtime trip count is too small when profile is available.
1086 if (L->getHeader()->getParent()->hasProfileData()) {
1087 if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
1088 if (*ProfileTripCount < FlatLoopTripCountThreshold)
1089 return false;
1090 else
1091 UP.AllowExpensiveTripCount = true;
1092 }
1093 }
1094 UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
1095 if (!UP.Runtime) {
1096 LLVM_DEBUG(
1097 dbgs() << " will not try to unroll loop with runtime trip count "
1098 << "-unroll-runtime not given\n");
1099 UP.Count = 0;
1100 return false;
1101 }
1102 if (UP.Count == 0)
1104
1105 // Reduce unroll count to be the largest power-of-two factor of
1106 // the original count which satisfies the threshold limit.
1107 while (UP.Count != 0 &&
1109 UP.Count >>= 1;
1110
1111#ifndef NDEBUG
1112 unsigned OrigCount = UP.Count;
1113#endif
1114
1115 if (!UP.AllowRemainder && UP.Count != 0 && (TripMultiple % UP.Count) != 0) {
1116 while (UP.Count != 0 && TripMultiple % UP.Count != 0)
1117 UP.Count >>= 1;
1118 LLVM_DEBUG(
1119 dbgs() << "Remainder loop is restricted (that could architecture "
1120 "specific or because the loop contains a convergent "
1121 "instruction), so unroll count must divide the trip "
1122 "multiple, "
1123 << TripMultiple << ". Reducing unroll count from " << OrigCount
1124 << " to " << UP.Count << ".\n");
1125
1126 using namespace ore;
1127
1128 if (unrollCountPragmaValue(L) > 0 && !UP.AllowRemainder)
1129 ORE->emit([&]() {
1131 "DifferentUnrollCountFromDirected",
1132 L->getStartLoc(), L->getHeader())
1133 << "Unable to unroll loop the number of times directed by "
1134 "unroll_count pragma because remainder loop is restricted "
1135 "(that could architecture specific or because the loop "
1136 "contains a convergent instruction) and so must have an "
1137 "unroll "
1138 "count that divides the loop trip multiple of "
1139 << NV("TripMultiple", TripMultiple) << ". Unrolling instead "
1140 << NV("UnrollCount", UP.Count) << " time(s).";
1141 });
1142 }
1143
1144 if (UP.Count > UP.MaxCount)
1145 UP.Count = UP.MaxCount;
1146
1147 if (MaxTripCount && UP.Count > MaxTripCount)
1148 UP.Count = MaxTripCount;
1149
1150 LLVM_DEBUG(dbgs() << " runtime unrolling with count: " << UP.Count
1151 << "\n");
1152 if (UP.Count < 2)
1153 UP.Count = 0;
1154 return ExplicitUnroll;
1155}
1156
1157static LoopUnrollResult
1161 ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel,
1162 bool OnlyFullUnroll, bool OnlyWhenForced, bool ForgetAllSCEV,
1163 std::optional<unsigned> ProvidedCount,
1164 std::optional<unsigned> ProvidedThreshold,
1165 std::optional<bool> ProvidedAllowPartial,
1166 std::optional<bool> ProvidedRuntime,
1167 std::optional<bool> ProvidedUpperBound,
1168 std::optional<bool> ProvidedAllowPeeling,
1169 std::optional<bool> ProvidedAllowProfileBasedPeeling,
1170 std::optional<unsigned> ProvidedFullUnrollMaxCount,
1171 AAResults *AA = nullptr) {
1172
1173 LLVM_DEBUG(dbgs() << "Loop Unroll: F["
1174 << L->getHeader()->getParent()->getName() << "] Loop %"
1175 << L->getHeader()->getName() << "\n");
1177 if (TM & TM_Disable)
1179
1180 // If this loop isn't forced to be unrolled, avoid unrolling it when the
1181 // parent loop has an explicit unroll-and-jam pragma. This is to prevent
1182 // automatic unrolling from interfering with the user requested
1183 // transformation.
1184 Loop *ParentL = L->getParentLoop();
1185 if (ParentL != nullptr &&
1188 LLVM_DEBUG(dbgs() << "Not unrolling loop since parent loop has"
1189 << " llvm.loop.unroll_and_jam.\n");
1191 }
1192
1193 // If this loop isn't forced to be unrolled, avoid unrolling it when the
1194 // loop has an explicit unroll-and-jam pragma. This is to prevent automatic
1195 // unrolling from interfering with the user requested transformation.
1198 LLVM_DEBUG(
1199 dbgs()
1200 << " Not unrolling loop since it has llvm.loop.unroll_and_jam.\n");
1202 }
1203
1204 if (!L->isLoopSimplifyForm()) {
1205 LLVM_DEBUG(
1206 dbgs() << " Not unrolling loop which is not in loop-simplify form.\n");
1208 }
1209
1210 // When automatic unrolling is disabled, do not unroll unless overridden for
1211 // this loop.
1212 if (OnlyWhenForced && !(TM & TM_Enable))
1214
1215 bool OptForSize = L->getHeader()->getParent()->hasOptSize();
1217 L, SE, TTI, BFI, PSI, ORE, OptLevel, ProvidedThreshold, ProvidedCount,
1218 ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
1219 ProvidedFullUnrollMaxCount);
1221 L, SE, TTI, ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling, true);
1222
1223 // Exit early if unrolling is disabled. For OptForSize, we pick the loop size
1224 // as threshold later on.
1225 if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0) &&
1226 !OptForSize)
1228
1230 CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
1231
1232 UnrollCostEstimator UCE(L, TTI, EphValues, UP.BEInsns);
1233 if (!UCE.canUnroll()) {
1234 LLVM_DEBUG(dbgs() << " Loop not considered unrollable.\n");
1236 }
1237
1238 unsigned LoopSize = UCE.getRolledLoopSize();
1239 LLVM_DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
1240
1241 // When optimizing for size, use LoopSize + 1 as threshold (we use < Threshold
1242 // later), to (fully) unroll loops, if it does not increase code size.
1243 if (OptForSize)
1244 UP.Threshold = std::max(UP.Threshold, LoopSize + 1);
1245
1246 if (UCE.NumInlineCandidates != 0) {
1247 LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
1249 }
1250
1251 // Find the smallest exact trip count for any exit. This is an upper bound
1252 // on the loop trip count, but an exit at an earlier iteration is still
1253 // possible. An unroll by the smallest exact trip count guarantees that all
1254 // branches relating to at least one exit can be eliminated. This is unlike
1255 // the max trip count, which only guarantees that the backedge can be broken.
1256 unsigned TripCount = 0;
1257 unsigned TripMultiple = 1;
1258 SmallVector<BasicBlock *, 8> ExitingBlocks;
1259 L->getExitingBlocks(ExitingBlocks);
1260 for (BasicBlock *ExitingBlock : ExitingBlocks)
1261 if (unsigned TC = SE.getSmallConstantTripCount(L, ExitingBlock))
1262 if (!TripCount || TC < TripCount)
1263 TripCount = TripMultiple = TC;
1264
1265 if (!TripCount) {
1266 // If no exact trip count is known, determine the trip multiple of either
1267 // the loop latch or the single exiting block.
1268 // TODO: Relax for multiple exits.
1269 BasicBlock *ExitingBlock = L->getLoopLatch();
1270 if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
1271 ExitingBlock = L->getExitingBlock();
1272 if (ExitingBlock)
1273 TripMultiple = SE.getSmallConstantTripMultiple(L, ExitingBlock);
1274 }
1275
1276 // If the loop contains a convergent operation, the prelude we'd add
1277 // to do the first few instructions before we hit the unrolled loop
1278 // is unsafe -- it adds a control-flow dependency to the convergent
1279 // operation. Therefore restrict remainder loop (try unrolling without).
1280 //
1281 // TODO: This is somewhat conservative; we could allow the remainder if the
1282 // trip count is uniform.
1284
1285 // Try to find the trip count upper bound if we cannot find the exact trip
1286 // count.
1287 unsigned MaxTripCount = 0;
1288 bool MaxOrZero = false;
1289 if (!TripCount) {
1290 MaxTripCount = SE.getSmallConstantMaxTripCount(L);
1291 MaxOrZero = SE.isBackedgeTakenCountMaxOrZero(L);
1292 }
1293
1294 // computeUnrollCount() decides whether it is beneficial to use upper bound to
1295 // fully unroll the loop.
1296 bool UseUpperBound = false;
1297 bool IsCountSetExplicitly = computeUnrollCount(
1298 L, TTI, DT, LI, &AC, SE, EphValues, &ORE, TripCount, MaxTripCount,
1299 MaxOrZero, TripMultiple, UCE, UP, PP, UseUpperBound);
1300 if (!UP.Count)
1302
1304
1305 if (PP.PeelCount) {
1306 assert(UP.Count == 1 && "Cannot perform peel and unroll in the same step");
1307 LLVM_DEBUG(dbgs() << "PEELING loop %" << L->getHeader()->getName()
1308 << " with iteration count " << PP.PeelCount << "!\n");
1309 ORE.emit([&]() {
1310 return OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(),
1311 L->getHeader())
1312 << " peeled loop by " << ore::NV("PeelCount", PP.PeelCount)
1313 << " iterations";
1314 });
1315
1316 ValueToValueMapTy VMap;
1317 if (peelLoop(L, PP.PeelCount, LI, &SE, DT, &AC, PreserveLCSSA, VMap)) {
1318 simplifyLoopAfterUnroll(L, true, LI, &SE, &DT, &AC, &TTI, nullptr);
1319 // If the loop was peeled, we already "used up" the profile information
1320 // we had, so we don't want to unroll or peel again.
1322 L->setLoopAlreadyUnrolled();
1324 }
1326 }
1327
1328 // Do not attempt partial/runtime unrolling in FullLoopUnrolling
1329 if (OnlyFullUnroll && (UP.Count < TripCount || UP.Count < MaxTripCount)) {
1330 LLVM_DEBUG(
1331 dbgs() << "Not attempting partial/runtime unroll in FullLoopUnroll.\n");
1333 }
1334
1335 // At this point, UP.Runtime indicates that run-time unrolling is allowed.
1336 // However, we only want to actually perform it if we don't know the trip
1337 // count and the unroll count doesn't divide the known trip multiple.
1338 // TODO: This decision should probably be pushed up into
1339 // computeUnrollCount().
1340 UP.Runtime &= TripCount == 0 && TripMultiple % UP.Count != 0;
1341
1342 // Save loop properties before it is transformed.
1343 MDNode *OrigLoopID = L->getLoopID();
1344
1345 // Unroll the loop.
1346 Loop *RemainderLoop = nullptr;
1348 ULO.Count = UP.Count;
1349 ULO.Force = UP.Force;
1352 ULO.Runtime = UP.Runtime;
1353 ULO.ForgetAllSCEV = ForgetAllSCEV;
1357 LoopUnrollResult UnrollResult = UnrollLoop(
1358 L, ULO, LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop, AA);
1359 if (UnrollResult == LoopUnrollResult::Unmodified)
1361
1362 if (RemainderLoop) {
1363 std::optional<MDNode *> RemainderLoopID =
1366 if (RemainderLoopID)
1367 RemainderLoop->setLoopID(*RemainderLoopID);
1368 }
1369
1370 if (UnrollResult != LoopUnrollResult::FullyUnrolled) {
1371 std::optional<MDNode *> NewLoopID =
1374 if (NewLoopID) {
1375 L->setLoopID(*NewLoopID);
1376
1377 // Do not setLoopAlreadyUnrolled if loop attributes have been specified
1378 // explicitly.
1379 return UnrollResult;
1380 }
1381 }
1382
1383 // If loop has an unroll count pragma or unrolled by explicitly set count
1384 // mark loop as unrolled to prevent unrolling beyond that requested.
1385 if (UnrollResult != LoopUnrollResult::FullyUnrolled && IsCountSetExplicitly)
1386 L->setLoopAlreadyUnrolled();
1387
1388 return UnrollResult;
1389}
1390
1391namespace {
1392
1393class LoopUnroll : public LoopPass {
1394public:
1395 static char ID; // Pass ID, replacement for typeid
1396
1397 int OptLevel;
1398
1399 /// If false, use a cost model to determine whether unrolling of a loop is
1400 /// profitable. If true, only loops that explicitly request unrolling via
1401 /// metadata are considered. All other loops are skipped.
1402 bool OnlyWhenForced;
1403
1404 /// If false, when SCEV is invalidated, only forget everything in the
1405 /// top-most loop (call forgetTopMostLoop), of the loop being processed.
1406 /// Otherwise, forgetAllLoops and rebuild when needed next.
1407 bool ForgetAllSCEV;
1408
1409 std::optional<unsigned> ProvidedCount;
1410 std::optional<unsigned> ProvidedThreshold;
1411 std::optional<bool> ProvidedAllowPartial;
1412 std::optional<bool> ProvidedRuntime;
1413 std::optional<bool> ProvidedUpperBound;
1414 std::optional<bool> ProvidedAllowPeeling;
1415 std::optional<bool> ProvidedAllowProfileBasedPeeling;
1416 std::optional<unsigned> ProvidedFullUnrollMaxCount;
1417
1418 LoopUnroll(int OptLevel = 2, bool OnlyWhenForced = false,
1419 bool ForgetAllSCEV = false,
1420 std::optional<unsigned> Threshold = std::nullopt,
1421 std::optional<unsigned> Count = std::nullopt,
1422 std::optional<bool> AllowPartial = std::nullopt,
1423 std::optional<bool> Runtime = std::nullopt,
1424 std::optional<bool> UpperBound = std::nullopt,
1425 std::optional<bool> AllowPeeling = std::nullopt,
1426 std::optional<bool> AllowProfileBasedPeeling = std::nullopt,
1427 std::optional<unsigned> ProvidedFullUnrollMaxCount = std::nullopt)
1428 : LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced),
1429 ForgetAllSCEV(ForgetAllSCEV), ProvidedCount(std::move(Count)),
1430 ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
1431 ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound),
1432 ProvidedAllowPeeling(AllowPeeling),
1433 ProvidedAllowProfileBasedPeeling(AllowProfileBasedPeeling),
1434 ProvidedFullUnrollMaxCount(ProvidedFullUnrollMaxCount) {
1436 }
1437
1438 bool runOnLoop(Loop *L, LPPassManager &LPM) override {
1439 if (skipLoop(L))
1440 return false;
1441
1442 Function &F = *L->getHeader()->getParent();
1443
1444 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1445 LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1446 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1447 const TargetTransformInfo &TTI =
1448 getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1449 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1450 // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
1451 // pass. Function analyses need to be preserved across loop transformations
1452 // but ORE cannot be preserved (see comment before the pass definition).
1454 bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
1455
1457 L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr, PreserveLCSSA, OptLevel,
1458 /*OnlyFullUnroll*/ false, OnlyWhenForced, ForgetAllSCEV, ProvidedCount,
1459 ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime,
1460 ProvidedUpperBound, ProvidedAllowPeeling,
1461 ProvidedAllowProfileBasedPeeling, ProvidedFullUnrollMaxCount);
1462
1463 if (Result == LoopUnrollResult::FullyUnrolled)
1464 LPM.markLoopAsDeleted(*L);
1465
1466 return Result != LoopUnrollResult::Unmodified;
1467 }
1468
1469 /// This transformation requires natural loop information & requires that
1470 /// loop preheaders be inserted into the CFG...
1471 void getAnalysisUsage(AnalysisUsage &AU) const override {
1474 // FIXME: Loop passes are required to preserve domtree, and for now we just
1475 // recreate dom info if anything gets unrolled.
1477 }
1478};
1479
1480} // end anonymous namespace
1481
1482char LoopUnroll::ID = 0;
1483
1484INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1488INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1489
1490Pass *llvm::createLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
1491 bool ForgetAllSCEV, int Threshold, int Count,
1492 int AllowPartial, int Runtime, int UpperBound,
1493 int AllowPeeling) {
1494 // TODO: It would make more sense for this function to take the optionals
1495 // directly, but that's dangerous since it would silently break out of tree
1496 // callers.
1497 return new LoopUnroll(
1498 OptLevel, OnlyWhenForced, ForgetAllSCEV,
1499 Threshold == -1 ? std::nullopt : std::optional<unsigned>(Threshold),
1500 Count == -1 ? std::nullopt : std::optional<unsigned>(Count),
1501 AllowPartial == -1 ? std::nullopt : std::optional<bool>(AllowPartial),
1502 Runtime == -1 ? std::nullopt : std::optional<bool>(Runtime),
1503 UpperBound == -1 ? std::nullopt : std::optional<bool>(UpperBound),
1504 AllowPeeling == -1 ? std::nullopt : std::optional<bool>(AllowPeeling));
1505}
1506
1509 LPMUpdater &Updater) {
1510 // For the new PM, we can't use OptimizationRemarkEmitter as an analysis
1511 // pass. Function analyses need to be preserved across loop transformations
1512 // but ORE cannot be preserved (see comment before the pass definition).
1513 OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
1514
1515 // Keep track of the previous loop structure so we can identify new loops
1516 // created by unrolling.
1517 Loop *ParentL = L.getParentLoop();
1518 SmallPtrSet<Loop *, 4> OldLoops;
1519 if (ParentL)
1520 OldLoops.insert(ParentL->begin(), ParentL->end());
1521 else
1522 OldLoops.insert(AR.LI.begin(), AR.LI.end());
1523
1524 std::string LoopName = std::string(L.getName());
1525
1526 bool Changed =
1527 tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, ORE,
1528 /*BFI*/ nullptr, /*PSI*/ nullptr,
1529 /*PreserveLCSSA*/ true, OptLevel, /*OnlyFullUnroll*/ true,
1530 OnlyWhenForced, ForgetSCEV, /*Count*/ std::nullopt,
1531 /*Threshold*/ std::nullopt, /*AllowPartial*/ false,
1532 /*Runtime*/ false, /*UpperBound*/ false,
1533 /*AllowPeeling*/ true,
1534 /*AllowProfileBasedPeeling*/ false,
1535 /*FullUnrollMaxCount*/ std::nullopt) !=
1537 if (!Changed)
1538 return PreservedAnalyses::all();
1539
1540 // The parent must not be damaged by unrolling!
1541#ifndef NDEBUG
1542 if (ParentL)
1543 ParentL->verifyLoop();
1544#endif
1545
1546 // Unrolling can do several things to introduce new loops into a loop nest:
1547 // - Full unrolling clones child loops within the current loop but then
1548 // removes the current loop making all of the children appear to be new
1549 // sibling loops.
1550 //
1551 // When a new loop appears as a sibling loop after fully unrolling,
1552 // its nesting structure has fundamentally changed and we want to revisit
1553 // it to reflect that.
1554 //
1555 // When unrolling has removed the current loop, we need to tell the
1556 // infrastructure that it is gone.
1557 //
1558 // Finally, we support a debugging/testing mode where we revisit child loops
1559 // as well. These are not expected to require further optimizations as either
1560 // they or the loop they were cloned from have been directly visited already.
1561 // But the debugging mode allows us to check this assumption.
1562 bool IsCurrentLoopValid = false;
1563 SmallVector<Loop *, 4> SibLoops;
1564 if (ParentL)
1565 SibLoops.append(ParentL->begin(), ParentL->end());
1566 else
1567 SibLoops.append(AR.LI.begin(), AR.LI.end());
1568 erase_if(SibLoops, [&](Loop *SibLoop) {
1569 if (SibLoop == &L) {
1570 IsCurrentLoopValid = true;
1571 return true;
1572 }
1573
1574 // Otherwise erase the loop from the list if it was in the old loops.
1575 return OldLoops.contains(SibLoop);
1576 });
1577 Updater.addSiblingLoops(SibLoops);
1578
1579 if (!IsCurrentLoopValid) {
1580 Updater.markLoopAsDeleted(L, LoopName);
1581 } else {
1582 // We can only walk child loops if the current loop remained valid.
1584 // Walk *all* of the child loops.
1585 SmallVector<Loop *, 4> ChildLoops(L.begin(), L.end());
1586 Updater.addChildLoops(ChildLoops);
1587 }
1588 }
1589
1591}
1592
1595 auto &LI = AM.getResult<LoopAnalysis>(F);
1596 // There are no loops in the function. Return before computing other expensive
1597 // analyses.
1598 if (LI.empty())
1599 return PreservedAnalyses::all();
1600 auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
1601 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1602 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1603 auto &AC = AM.getResult<AssumptionAnalysis>(F);
1605 AAResults &AA = AM.getResult<AAManager>(F);
1606
1607 LoopAnalysisManager *LAM = nullptr;
1608 if (auto *LAMProxy = AM.getCachedResult<LoopAnalysisManagerFunctionProxy>(F))
1609 LAM = &LAMProxy->getManager();
1610
1611 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
1612 ProfileSummaryInfo *PSI =
1613 MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
1614 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
1615 &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
1616
1617 bool Changed = false;
1618
1619 // The unroller requires loops to be in simplified form, and also needs LCSSA.
1620 // Since simplification may add new inner loops, it has to run before the
1621 // legality and profitability checks. This means running the loop unroller
1622 // will simplify all loops, regardless of whether anything end up being
1623 // unrolled.
1624 for (const auto &L : LI) {
1625 Changed |=
1626 simplifyLoop(L, &DT, &LI, &SE, &AC, nullptr, false /* PreserveLCSSA */);
1627 Changed |= formLCSSARecursively(*L, DT, &LI, &SE);
1628 }
1629
1630 // Add the loop nests in the reverse order of LoopInfo. See method
1631 // declaration.
1633 appendLoopsToWorklist(LI, Worklist);
1634
1635 while (!Worklist.empty()) {
1636 // Because the LoopInfo stores the loops in RPO, we walk the worklist
1637 // from back to front so that we work forward across the CFG, which
1638 // for unrolling is only needed to get optimization remarks emitted in
1639 // a forward order.
1640 Loop &L = *Worklist.pop_back_val();
1641#ifndef NDEBUG
1642 Loop *ParentL = L.getParentLoop();
1643#endif
1644
1645 // Check if the profile summary indicates that the profiled application
1646 // has a huge working set size, in which case we disable peeling to avoid
1647 // bloating it further.
1648 std::optional<bool> LocalAllowPeeling = UnrollOpts.AllowPeeling;
1649 if (PSI && PSI->hasHugeWorkingSetSize())
1650 LocalAllowPeeling = false;
1651 std::string LoopName = std::string(L.getName());
1652 // The API here is quite complex to call and we allow to select some
1653 // flavors of unrolling during construction time (by setting UnrollOpts).
1655 &L, DT, &LI, SE, TTI, AC, ORE, BFI, PSI,
1656 /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, /*OnlyFullUnroll*/ false,
1657 UnrollOpts.OnlyWhenForced, UnrollOpts.ForgetSCEV,
1658 /*Count*/ std::nullopt,
1659 /*Threshold*/ std::nullopt, UnrollOpts.AllowPartial,
1660 UnrollOpts.AllowRuntime, UnrollOpts.AllowUpperBound, LocalAllowPeeling,
1661 UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount,
1662 &AA);
1663 Changed |= Result != LoopUnrollResult::Unmodified;
1664
1665 // The parent must not be damaged by unrolling!
1666#ifndef NDEBUG
1667 if (Result != LoopUnrollResult::Unmodified && ParentL)
1668 ParentL->verifyLoop();
1669#endif
1670
1671 // Clear any cached analysis results for L if we removed it completely.
1672 if (LAM && Result == LoopUnrollResult::FullyUnrolled)
1673 LAM->clear(L, LoopName);
1674 }
1675
1676 if (!Changed)
1677 return PreservedAnalyses::all();
1678
1680}
1681
1683 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1684 static_cast<PassInfoMixin<LoopUnrollPass> *>(this)->printPipeline(
1685 OS, MapClassName2PassName);
1686 OS << '<';
1687 if (UnrollOpts.AllowPartial != std::nullopt)
1688 OS << (*UnrollOpts.AllowPartial ? "" : "no-") << "partial;";
1689 if (UnrollOpts.AllowPeeling != std::nullopt)
1690 OS << (*UnrollOpts.AllowPeeling ? "" : "no-") << "peeling;";
1691 if (UnrollOpts.AllowRuntime != std::nullopt)
1692 OS << (*UnrollOpts.AllowRuntime ? "" : "no-") << "runtime;";
1693 if (UnrollOpts.AllowUpperBound != std::nullopt)
1694 OS << (*UnrollOpts.AllowUpperBound ? "" : "no-") << "upperbound;";
1695 if (UnrollOpts.AllowProfileBasedPeeling != std::nullopt)
1696 OS << (*UnrollOpts.AllowProfileBasedPeeling ? "" : "no-")
1697 << "profile-peeling;";
1698 if (UnrollOpts.FullUnrollMaxCount != std::nullopt)
1699 OS << "full-unroll-max=" << UnrollOpts.FullUnrollMaxCount << ';';
1700 OS << 'O' << UnrollOpts.OptLevel;
1701 OS << '>';
1702}
Rewrite undef for PHI
static bool isEqual(const Function &Caller, const Function &Callee)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines DenseMapInfo traits for DenseMap.
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
std::string Name
#define DEBUG_TYPE
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
This header provides classes for managing per-loop analyses.
loops
Definition: LoopInfo.cpp:1221
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
static MDNode * getUnrollMetadataForLoop(const Loop *L, StringRef Name)
static cl::opt< unsigned > UnrollMaxCount("unroll-max-count", cl::Hidden, cl::desc("Set the max unroll count for partial and runtime unrolling, for" "testing purposes"))
static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes"))
static cl::opt< unsigned > UnrollThresholdDefault("unroll-threshold-default", cl::init(150), cl::Hidden, cl::desc("Default threshold (max size of unrolled " "loop), used in all but O3 optimizations"))
static cl::opt< unsigned > FlatLoopTripCountThreshold("flat-loop-tripcount-threshold", cl::init(5), cl::Hidden, cl::desc("If the runtime tripcount for the loop is lower than the " "threshold, the loop is considered as flat and will be less " "aggressively unrolled."))
static cl::opt< unsigned > UnrollOptSizeThreshold("unroll-optsize-threshold", cl::init(0), cl::Hidden, cl::desc("The cost threshold for loop unrolling when optimizing for " "size"))
static bool hasUnrollFullPragma(const Loop *L)
static cl::opt< bool > UnrollUnrollRemainder("unroll-remainder", cl::Hidden, cl::desc("Allow the loop remainder to be unrolled."))
static unsigned unrollCountPragmaValue(const Loop *L)
static bool hasUnrollEnablePragma(const Loop *L)
static cl::opt< unsigned > UnrollFullMaxCount("unroll-full-max-count", cl::Hidden, cl::desc("Set the max unroll count for full unrolling, for testing purposes"))
static cl::opt< unsigned > UnrollMaxUpperBound("unroll-max-upperbound", cl::init(8), cl::Hidden, cl::desc("The max of trip count upper bound that is considered in unrolling"))
static std::optional< unsigned > shouldFullUnroll(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE, const TargetTransformInfo::UnrollingPreferences &UP)
static std::optional< EstimatedUnrollCost > analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize, unsigned MaxIterationsCountToAnalyze)
Figure out if the loop is worth full unrolling.
static cl::opt< unsigned > UnrollPartialThreshold("unroll-partial-threshold", cl::Hidden, cl::desc("The cost threshold for partial loop unrolling"))
static cl::opt< bool > UnrollAllowRemainder("unroll-allow-remainder", cl::Hidden, cl::desc("Allow generation of a loop remainder (extra iterations) " "when unrolling a loop."))
static std::optional< unsigned > shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount, const UnrollCostEstimator UCE, const TargetTransformInfo::UnrollingPreferences &UP)
static cl::opt< unsigned > PragmaUnrollFullMaxIterations("pragma-unroll-full-max-iterations", cl::init(1 '000 '000), cl::Hidden, cl::desc("Maximum allowed iterations to unroll under pragma unroll full."))
static const unsigned NoThreshold
A magic value for use with the Threshold parameter to indicate that the loop unroll should be perform...
static std::optional< unsigned > shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo, const unsigned TripMultiple, const unsigned TripCount, unsigned MaxTripCount, const UnrollCostEstimator UCE, const TargetTransformInfo::UnrollingPreferences &UP)
static cl::opt< bool > UnrollRevisitChildLoops("unroll-revisit-child-loops", cl::Hidden, cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. " "This shouldn't typically be needed as child loops (or their " "clones) were already visited."))
static cl::opt< unsigned > UnrollThreshold("unroll-threshold", cl::Hidden, cl::desc("The cost threshold for loop unrolling"))
static cl::opt< bool > UnrollRuntime("unroll-runtime", cl::Hidden, cl::desc("Unroll loops with run-time trip counts"))
static LoopUnrollResult tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel, bool OnlyFullUnroll, bool OnlyWhenForced, bool ForgetAllSCEV, std::optional< unsigned > ProvidedCount, std::optional< unsigned > ProvidedThreshold, std::optional< bool > ProvidedAllowPartial, std::optional< bool > ProvidedRuntime, std::optional< bool > ProvidedUpperBound, std::optional< bool > ProvidedAllowPeeling, std::optional< bool > ProvidedAllowProfileBasedPeeling, std::optional< unsigned > ProvidedFullUnrollMaxCount, AAResults *AA=nullptr)
static bool hasRuntimeUnrollDisablePragma(const Loop *L)
static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost, unsigned MaxPercentThresholdBoost)
static cl::opt< unsigned > UnrollThresholdAggressive("unroll-threshold-aggressive", cl::init(300), cl::Hidden, cl::desc("Threshold (max size of unrolled loop) to use in aggressive (O3) " "optimizations"))
static cl::opt< unsigned > UnrollMaxIterationsCountToAnalyze("unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden, cl::desc("Don't allow loop unrolling to simulate more than this number of " "iterations when checking full unroll profitability"))
static cl::opt< unsigned > UnrollMaxPercentThresholdBoost("unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden, cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied " "to the threshold when aggressively unrolling a loop due to the " "dynamic cost savings. If completely unrolling a loop will reduce " "the total runtime from X to Y, we boost the loop unroll " "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, " "X/Y). This limit avoids excessive code bloat."))
static cl::opt< unsigned > PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 *1024), cl::Hidden, cl::desc("Unrolled size limit for loops with an unroll(full) or " "unroll_count pragma."))
static cl::opt< bool > UnrollAllowPartial("unroll-allow-partial", cl::Hidden, cl::desc("Allows loops to be partially unrolled until " "-unroll-threshold loop size is reached."))
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
Machine Trace Metrics
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
LoopAnalysisManager LAM
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
A manager for alias analyses.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
void clear(IRUnitT &IR, llvm::StringRef Name)
Clear any cached analysis results for a single unit of IR.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:429
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:240
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:194
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:152
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
Implements a dense probed hash-table based set.
Definition: DenseSet.h:278
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:710
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:567
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:72
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
void addChildLoops(ArrayRef< Loop * > NewChildLoops)
Loop passes should use this method to indicate they have added new child loops of the current loop.
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
void addSiblingLoops(ArrayRef< Loop * > NewSibLoops)
Loop passes should use this method to indicate they have added new sibling loops to the current loop.
void markLoopAsDeleted(Loop &L)
Definition: LoopPass.cpp:111
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:566
void verifyLoop() const
Verify loop structure.
iterator end() const
iterator begin() const
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
iterator end() const
iterator begin() const
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:526
Metadata node.
Definition: Metadata.h:1073
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1434
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1440
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:692
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:94
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
bool empty() const
Determine if the PriorityWorklist is empty or not.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
bool isBackedgeTakenCountMaxOrZero(const Loop *L)
Return true if the backedge taken count is either the value returned by getConstantMaxBackedgeTakenCo...
unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
void clear()
Completely clear the SetVector.
Definition: SetVector.h:273
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
value_type pop_back_val()
Definition: SetVector.h:285
A version of PriorityWorklist that selects small size optimized data structures for the vector and ma...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:452
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:458
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:81
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
Multiway switch.
Analysis pass providing the TargetTransformInfo.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const
Get target-customized preferences for the generic loop unrolling transformation.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Produce an estimate of the unrolled cost of the specified loop.
Definition: UnrollLoop.h:130
ConvergenceKind Convergence
Definition: UnrollLoop.h:136
uint64_t getUnrolledLoopSize(const TargetTransformInfo::UnrollingPreferences &UP, unsigned CountOverwrite=0) const
Returns loop size estimation for unrolled loop, given the unrolling configuration specified by UP.
bool canUnroll() const
Whether it is legal to unroll this loop.
UnrollCostEstimator(const Loop *L, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &EphValues, unsigned BEInsns)
uint64_t getRolledLoopSize() const
Definition: UnrollLoop.h:146
LLVM Value Representation.
Definition: Value.h:74
int getNumOccurrences() const
Definition: CommandLine.h:399
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:213
iterator find(const_arg_type_t< ValueT > V)
Definition: DenseSet.h:187
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
std::optional< unsigned > getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight=nullptr)
Returns a loop's estimated trip count based on branch weight metadata.
Definition: LoopUtils.cpp:850
void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, AAResults *AA=nullptr)
Perform some cleanup and simplifications on loops after unrolling.
Definition: LoopUnroll.cpp:342
void initializeLoopUnrollPass(PassRegistry &)
void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::PeelingPreferences &PP, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, AssumptionCache *AC=nullptr, unsigned Threshold=UINT_MAX)
Definition: LoopPeel.cpp:536
auto successors(const MachineBasicBlock *BB)
@ Runtime
Detect stack use after return if not disabled runtime with (ASAN_OPTIONS=detect_stack_use_after_retur...
bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:465
std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:263
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
char & LCSSAID
Definition: LCSSA.cpp:542
Pass * createLoopUnrollPass(int OptLevel=2, bool OnlyWhenForced=false, bool ForgetAllSCEV=false, int Threshold=-1, int Count=-1, int AllowPartial=-1, int Runtime=-1, int UpperBound=-1, int AllowPeeling=-1)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1952
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
Definition: LoopPeel.cpp:870
CallBase * getLoopConvergenceHeart(const Loop *TheLoop)
Find the convergence heart of the loop.
Definition: LoopInfo.cpp:1132
TransformationMode hasUnrollAndJamTransformation(const Loop *L)
Definition: LoopUtils.cpp:373
cl::opt< bool > ForgetSCEVInLoopUnroll
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
cl::opt< unsigned > SCEVCheapExpansionBudget
TransformationMode hasUnrollTransformation(const Loop *L)
Definition: LoopUtils.cpp:352
LoopUnrollResult
Represents the result of a UnrollLoop invocation.
Definition: UnrollLoop.h:56
@ PartiallyUnrolled
The loop was partially unrolled – we still have a loop, but with a smaller trip count.
@ Unmodified
The loop was not modified.
@ FullyUnrolled
The loop was fully unrolled into straight-line code.
bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:141
const char *const LLVMLoopUnrollFollowupAll
Definition: UnrollLoop.h:44
TransformationMode
The mode sets how eager a transformation should be applied.
Definition: LoopUtils.h:277
@ TM_ForcedByUser
The transformation was directed by the user, e.g.
Definition: LoopUtils.h:294
@ TM_Disable
The transformation should not be applied.
Definition: LoopUtils.h:286
@ TM_Enable
The transformation should be applied without considering a cost model.
Definition: LoopUtils.h:283
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1938
void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
Definition: LoopUtils.cpp:1814
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1873
const char *const LLVMLoopUnrollFollowupRemainder
Definition: UnrollLoop.h:47
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
const char *const LLVMLoopUnrollFollowupUnrolled
Definition: UnrollLoop.h:45
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2099
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const llvm::TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop=nullptr, AAResults *AA=nullptr)
Unroll the given loop by Count.
Definition: LoopUnroll.cpp:458
bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree &DT, AssumptionCache *AC, bool PreserveLCSSA, ValueToValueMapTy &VMap)
VMap is the value-map that maps instructions from the original loop to instructions in the last peele...
Definition: LoopPeel.cpp:914
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:33
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:71
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:52
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
bool OnlyWhenForced
If false, use a cost model to determine whether unrolling of a loop is profitable.
const bool ForgetSCEV
If true, forget all loops when unrolling.
std::optional< unsigned > FullUnrollMaxCount
std::optional< bool > AllowPartial
std::optional< bool > AllowRuntime
std::optional< bool > AllowProfileBasedPeeling
std::optional< bool > AllowPeeling
std::optional< bool > AllowUpperBound
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:69
bool PeelProfiledIterations
Allow peeling basing on profile.
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Parameters that control the generic loop unrolling transformation.
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
bool UpperBound
Allow using trip count upper bound to unroll loops.
unsigned Threshold
The cost threshold for the unrolled loop.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
bool RuntimeUnrollMultiExit
Allow runtime unrolling multi-exit loops.
unsigned SCEVExpansionBudget
Don't allow runtime unrolling if expanding the trip count takes more than SCEVExpansionBudget.
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
unsigned MaxIterationsCountToAnalyze
Don't allow loop unrolling to simulate more than this number of iterations when checking full unroll ...
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
unsigned MaxUpperBound
Set the maximum upper bound of trip count.
const Instruction * Heart
Definition: UnrollLoop.h:77
unsigned SCEVExpansionBudget
Definition: UnrollLoop.h:78