LLVM 22.0.0git
IndVarSimplify.cpp
Go to the documentation of this file.
1//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This transformation analyzes and transforms the induction variables (and
10// computations derived from them) into simpler forms suitable for subsequent
11// analysis and transformation.
12//
13// If the trip count of a loop is computable, this pass also makes the following
14// changes:
15// 1. The exit condition for the loop is canonicalized to compare the
16// induction value against the exit value. This turns loops like:
17// 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18// 2. Any use outside of the loop of an expression derived from the indvar
19// is changed to compute the derived value outside of the loop, eliminating
20// the dependence on the exit value of the induction variable. If the only
21// purpose of the loop is to compute the exit value of some derived
22// expression, this transformation will make the loop dead.
23//
24//===----------------------------------------------------------------------===//
25
27#include "llvm/ADT/APFloat.h"
28#include "llvm/ADT/ArrayRef.h"
29#include "llvm/ADT/STLExtras.h"
32#include "llvm/ADT/Statistic.h"
44#include "llvm/IR/BasicBlock.h"
45#include "llvm/IR/Constant.h"
47#include "llvm/IR/Constants.h"
48#include "llvm/IR/DataLayout.h"
50#include "llvm/IR/Dominators.h"
51#include "llvm/IR/Function.h"
52#include "llvm/IR/IRBuilder.h"
53#include "llvm/IR/InstrTypes.h"
54#include "llvm/IR/Instruction.h"
56#include "llvm/IR/Intrinsics.h"
57#include "llvm/IR/PassManager.h"
59#include "llvm/IR/Type.h"
60#include "llvm/IR/Use.h"
61#include "llvm/IR/User.h"
62#include "llvm/IR/Value.h"
63#include "llvm/IR/ValueHandle.h"
66#include "llvm/Support/Debug.h"
75#include <cassert>
76#include <cstdint>
77#include <utility>
78
79using namespace llvm;
80using namespace PatternMatch;
81using namespace SCEVPatternMatch;
82
83#define DEBUG_TYPE "indvars"
84
85STATISTIC(NumWidened , "Number of indvars widened");
86STATISTIC(NumReplaced , "Number of exit values replaced");
87STATISTIC(NumLFTR , "Number of loop exit tests replaced");
88STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
89STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
90
92 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
93 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
95 clEnumValN(NeverRepl, "never", "never replace exit value"),
97 "only replace exit value when the cost is cheap"),
99 UnusedIndVarInLoop, "unusedindvarinloop",
100 "only replace exit value when it is an unused "
101 "induction variable in the loop and has cheap replacement cost"),
102 clEnumValN(NoHardUse, "noharduse",
103 "only replace exit values when loop def likely dead"),
104 clEnumValN(AlwaysRepl, "always",
105 "always replace exit value whenever possible")));
106
108 "indvars-post-increment-ranges", cl::Hidden,
109 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
110 cl::init(true));
111
112static cl::opt<bool>
113DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
114 cl::desc("Disable Linear Function Test Replace optimization"));
115
116static cl::opt<bool>
117LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
118 cl::desc("Predicate conditions in read only loops"));
119
120static cl::opt<bool>
121AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
122 cl::desc("Allow widening of indvars to eliminate s/zext"));
123
124namespace {
125
126class IndVarSimplify {
127 LoopInfo *LI;
128 ScalarEvolution *SE;
129 DominatorTree *DT;
130 const DataLayout &DL;
133 std::unique_ptr<MemorySSAUpdater> MSSAU;
134
136 bool WidenIndVars;
137
138 bool RunUnswitching = false;
139
140 bool handleFloatingPointIV(Loop *L, PHINode *PH);
141 bool rewriteNonIntegerIVs(Loop *L);
142
143 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
144 /// Try to improve our exit conditions by converting condition from signed
145 /// to unsigned or rotating computation out of the loop.
146 /// (See inline comment about why this is duplicated from simplifyAndExtend)
147 bool canonicalizeExitCondition(Loop *L);
148 /// Try to eliminate loop exits based on analyzeable exit counts
149 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
150 /// Try to form loop invariant tests for loop exits by changing how many
151 /// iterations of the loop run when that is unobservable.
152 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
153
154 bool rewriteFirstIterationLoopExitValues(Loop *L);
155
156 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
157 const SCEV *ExitCount,
158 PHINode *IndVar, SCEVExpander &Rewriter);
159
160 bool sinkUnusedInvariants(Loop *L);
161
162public:
163 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
164 const DataLayout &DL, TargetLibraryInfo *TLI,
165 TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
166 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
167 WidenIndVars(WidenIndVars) {
168 if (MSSA)
169 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
170 }
171
172 bool run(Loop *L);
173
174 bool runUnswitching() const { return RunUnswitching; }
175};
176
177} // end anonymous namespace
178
179//===----------------------------------------------------------------------===//
180// rewriteNonIntegerIVs and helpers. Prefer integer IVs.
181//===----------------------------------------------------------------------===//
182
183/// Convert APF to an integer, if possible.
184static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
185 bool isExact = false;
186 // See if we can convert this to an int64_t
187 uint64_t UIntVal;
188 if (APF.convertToInteger(MutableArrayRef(UIntVal), 64, true,
189 APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
190 !isExact)
191 return false;
192 IntVal = UIntVal;
193 return true;
194}
195
196/// If the loop has floating induction variable then insert corresponding
197/// integer induction variable if possible.
198/// For example,
199/// for(double i = 0; i < 10000; ++i)
200/// bar(i)
201/// is converted into
202/// for(int i = 0; i < 10000; ++i)
203/// bar((double)i);
204bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
205 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
206 unsigned BackEdge = IncomingEdge^1;
207
208 // Check incoming value.
209 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
210
211 int64_t InitValue;
212 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
213 return false;
214
215 // Check IV increment. Reject this PN if increment operation is not
216 // an add or increment value can not be represented by an integer.
217 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
218 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
219
220 // If this is not an add of the PHI with a constantfp, or if the constant fp
221 // is not an integer, bail out.
222 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
223 int64_t IncValue;
224 if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
225 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
226 return false;
227
228 // Check Incr uses. One user is PN and the other user is an exit condition
229 // used by the conditional terminator.
230 Value::user_iterator IncrUse = Incr->user_begin();
231 Instruction *U1 = cast<Instruction>(*IncrUse++);
232 if (IncrUse == Incr->user_end()) return false;
233 Instruction *U2 = cast<Instruction>(*IncrUse++);
234 if (IncrUse != Incr->user_end()) return false;
235
236 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
237 // only used by a branch, we can't transform it.
238 FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
239 if (!Compare)
240 Compare = dyn_cast<FCmpInst>(U2);
241 if (!Compare || !Compare->hasOneUse() ||
242 !isa<BranchInst>(Compare->user_back()))
243 return false;
244
245 BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
246
247 // We need to verify that the branch actually controls the iteration count
248 // of the loop. If not, the new IV can overflow and no one will notice.
249 // The branch block must be in the loop and one of the successors must be out
250 // of the loop.
251 assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
252 if (!L->contains(TheBr->getParent()) ||
253 (L->contains(TheBr->getSuccessor(0)) &&
254 L->contains(TheBr->getSuccessor(1))))
255 return false;
256
257 // If it isn't a comparison with an integer-as-fp (the exit value), we can't
258 // transform it.
259 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
260 int64_t ExitValue;
261 if (ExitValueVal == nullptr ||
262 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
263 return false;
264
265 // Find new predicate for integer comparison.
267 switch (Compare->getPredicate()) {
268 default: return false; // Unknown comparison.
270 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
272 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
274 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
276 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
278 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
280 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
281 }
282
283 // We convert the floating point induction variable to a signed i32 value if
284 // we can. This is only safe if the comparison will not overflow in a way
285 // that won't be trapped by the integer equivalent operations. Check for this
286 // now.
287 // TODO: We could use i64 if it is native and the range requires it.
288
289 // The start/stride/exit values must all fit in signed i32.
290 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
291 return false;
292
293 // If not actually striding (add x, 0.0), avoid touching the code.
294 if (IncValue == 0)
295 return false;
296
297 // Positive and negative strides have different safety conditions.
298 if (IncValue > 0) {
299 // If we have a positive stride, we require the init to be less than the
300 // exit value.
301 if (InitValue >= ExitValue)
302 return false;
303
304 uint32_t Range = uint32_t(ExitValue-InitValue);
305 // Check for infinite loop, either:
306 // while (i <= Exit) or until (i > Exit)
307 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
308 if (++Range == 0) return false; // Range overflows.
309 }
310
311 unsigned Leftover = Range % uint32_t(IncValue);
312
313 // If this is an equality comparison, we require that the strided value
314 // exactly land on the exit value, otherwise the IV condition will wrap
315 // around and do things the fp IV wouldn't.
316 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
317 Leftover != 0)
318 return false;
319
320 // If the stride would wrap around the i32 before exiting, we can't
321 // transform the IV.
322 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
323 return false;
324 } else {
325 // If we have a negative stride, we require the init to be greater than the
326 // exit value.
327 if (InitValue <= ExitValue)
328 return false;
329
330 uint32_t Range = uint32_t(InitValue-ExitValue);
331 // Check for infinite loop, either:
332 // while (i >= Exit) or until (i < Exit)
333 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
334 if (++Range == 0) return false; // Range overflows.
335 }
336
337 unsigned Leftover = Range % uint32_t(-IncValue);
338
339 // If this is an equality comparison, we require that the strided value
340 // exactly land on the exit value, otherwise the IV condition will wrap
341 // around and do things the fp IV wouldn't.
342 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
343 Leftover != 0)
344 return false;
345
346 // If the stride would wrap around the i32 before exiting, we can't
347 // transform the IV.
348 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
349 return false;
350 }
351
352 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
353
354 // Insert new integer induction variable.
355 PHINode *NewPHI =
356 PHINode::Create(Int32Ty, 2, PN->getName() + ".int", PN->getIterator());
357 NewPHI->addIncoming(ConstantInt::getSigned(Int32Ty, InitValue),
358 PN->getIncomingBlock(IncomingEdge));
359 NewPHI->setDebugLoc(PN->getDebugLoc());
360
361 Instruction *NewAdd = BinaryOperator::CreateAdd(
362 NewPHI, ConstantInt::getSigned(Int32Ty, IncValue),
363 Incr->getName() + ".int", Incr->getIterator());
364 NewAdd->setDebugLoc(Incr->getDebugLoc());
365 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
366
367 ICmpInst *NewCompare = new ICmpInst(
368 TheBr->getIterator(), NewPred, NewAdd,
369 ConstantInt::getSigned(Int32Ty, ExitValue), Compare->getName());
370 NewCompare->setDebugLoc(Compare->getDebugLoc());
371
372 // In the following deletions, PN may become dead and may be deleted.
373 // Use a WeakTrackingVH to observe whether this happens.
374 WeakTrackingVH WeakPH = PN;
375
376 // Delete the old floating point exit comparison. The branch starts using the
377 // new comparison.
378 NewCompare->takeName(Compare);
379 Compare->replaceAllUsesWith(NewCompare);
380 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
381
382 // Delete the old floating point increment.
383 Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType()));
384 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
385
386 // If the FP induction variable still has uses, this is because something else
387 // in the loop uses its value. In order to canonicalize the induction
388 // variable, we chose to eliminate the IV and rewrite it in terms of an
389 // int->fp cast.
390 //
391 // We give preference to sitofp over uitofp because it is faster on most
392 // platforms.
393 if (WeakPH) {
394 Instruction *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
395 PN->getParent()->getFirstInsertionPt());
396 Conv->setDebugLoc(PN->getDebugLoc());
397 PN->replaceAllUsesWith(Conv);
398 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
399 }
400 return true;
401}
402
403bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
404 // First step. Check to see if there are any floating-point recurrences.
405 // If there are, change them into integer recurrences, permitting analysis by
406 // the SCEV routines.
407 BasicBlock *Header = L->getHeader();
408
410
411 bool Changed = false;
412 for (WeakTrackingVH &PHI : PHIs)
413 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHI))
414 Changed |= handleFloatingPointIV(L, PN);
415
416 // If the loop previously had floating-point IV, ScalarEvolution
417 // may not have been able to compute a trip count. Now that we've done some
418 // re-writing, the trip count may be computable.
419 if (Changed)
420 SE->forgetLoop(L);
421 return Changed;
422}
423
424//===---------------------------------------------------------------------===//
425// rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
426// they will exit at the first iteration.
427//===---------------------------------------------------------------------===//
428
429/// Check to see if this loop has loop invariant conditions which lead to loop
430/// exits. If so, we know that if the exit path is taken, it is at the first
431/// loop iteration. This lets us predict exit values of PHI nodes that live in
432/// loop header.
433bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
434 // Verify the input to the pass is already in LCSSA form.
435 assert(L->isLCSSAForm(*DT));
436
438 L->getUniqueExitBlocks(ExitBlocks);
439
440 bool MadeAnyChanges = false;
441 for (auto *ExitBB : ExitBlocks) {
442 // If there are no more PHI nodes in this exit block, then no more
443 // values defined inside the loop are used on this path.
444 for (PHINode &PN : ExitBB->phis()) {
445 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
446 IncomingValIdx != E; ++IncomingValIdx) {
447 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
448
449 // Can we prove that the exit must run on the first iteration if it
450 // runs at all? (i.e. early exits are fine for our purposes, but
451 // traces which lead to this exit being taken on the 2nd iteration
452 // aren't.) Note that this is about whether the exit branch is
453 // executed, not about whether it is taken.
454 if (!L->getLoopLatch() ||
455 !DT->dominates(IncomingBB, L->getLoopLatch()))
456 continue;
457
458 // Get condition that leads to the exit path.
459 auto *TermInst = IncomingBB->getTerminator();
460
461 Value *Cond = nullptr;
462 if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
463 // Must be a conditional branch, otherwise the block
464 // should not be in the loop.
465 Cond = BI->getCondition();
466 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
467 Cond = SI->getCondition();
468 else
469 continue;
470
471 if (!L->isLoopInvariant(Cond))
472 continue;
473
474 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
475
476 // Only deal with PHIs in the loop header.
477 if (!ExitVal || ExitVal->getParent() != L->getHeader())
478 continue;
479
480 // If ExitVal is a PHI on the loop header, then we know its
481 // value along this exit because the exit can only be taken
482 // on the first iteration.
483 auto *LoopPreheader = L->getLoopPreheader();
484 assert(LoopPreheader && "Invalid loop");
485 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
486 if (PreheaderIdx != -1) {
487 assert(ExitVal->getParent() == L->getHeader() &&
488 "ExitVal must be in loop header");
489 MadeAnyChanges = true;
490 PN.setIncomingValue(IncomingValIdx,
491 ExitVal->getIncomingValue(PreheaderIdx));
492 SE->forgetValue(&PN);
493 }
494 }
495 }
496 }
497 return MadeAnyChanges;
498}
499
500//===----------------------------------------------------------------------===//
501// IV Widening - Extend the width of an IV to cover its widest uses.
502//===----------------------------------------------------------------------===//
503
504/// Update information about the induction variable that is extended by this
505/// sign or zero extend operation. This is used to determine the final width of
506/// the IV before actually widening it.
507static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
508 ScalarEvolution *SE,
509 const TargetTransformInfo *TTI) {
510 bool IsSigned = Cast->getOpcode() == Instruction::SExt;
511 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
512 return;
513
514 Type *Ty = Cast->getType();
515 uint64_t Width = SE->getTypeSizeInBits(Ty);
516 if (!Cast->getDataLayout().isLegalInteger(Width))
517 return;
518
519 // Check that `Cast` actually extends the induction variable (we rely on this
520 // later). This takes care of cases where `Cast` is extending a truncation of
521 // the narrow induction variable, and thus can end up being narrower than the
522 // "narrow" induction variable.
523 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
524 if (NarrowIVWidth >= Width)
525 return;
526
527 // Cast is either an sext or zext up to this point.
528 // We should not widen an indvar if arithmetics on the wider indvar are more
529 // expensive than those on the narrower indvar. We check only the cost of ADD
530 // because at least an ADD is required to increment the induction variable. We
531 // could compute more comprehensively the cost of all instructions on the
532 // induction variable when necessary.
533 if (TTI &&
534 TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
535 TTI->getArithmeticInstrCost(Instruction::Add,
536 Cast->getOperand(0)->getType())) {
537 return;
538 }
539
540 if (!WI.WidestNativeType ||
541 Width > SE->getTypeSizeInBits(WI.WidestNativeType)) {
543 WI.IsSigned = IsSigned;
544 return;
545 }
546
547 // We extend the IV to satisfy the sign of its user(s), or 'signed'
548 // if there are multiple users with both sign- and zero extensions,
549 // in order not to introduce nondeterministic behaviour based on the
550 // unspecified order of a PHI nodes' users-iterator.
551 WI.IsSigned |= IsSigned;
552}
553
554//===----------------------------------------------------------------------===//
555// Live IV Reduction - Minimize IVs live across the loop.
556//===----------------------------------------------------------------------===//
557
558//===----------------------------------------------------------------------===//
559// Simplification of IV users based on SCEV evaluation.
560//===----------------------------------------------------------------------===//
561
562namespace {
563
564class IndVarSimplifyVisitor : public IVVisitor {
565 ScalarEvolution *SE;
567 PHINode *IVPhi;
568
569public:
570 WideIVInfo WI;
571
572 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
574 const DominatorTree *DTree)
575 : SE(SCEV), TTI(TTI), IVPhi(IV) {
576 DT = DTree;
577 WI.NarrowIV = IVPhi;
578 }
579
580 // Implement the interface used by simplifyUsersOfIV.
581 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
582};
583
584} // end anonymous namespace
585
586/// Iteratively perform simplification on a worklist of IV users. Each
587/// successive simplification may push more users which may themselves be
588/// candidates for simplification.
589///
590/// Sign/Zero extend elimination is interleaved with IV simplification.
591bool IndVarSimplify::simplifyAndExtend(Loop *L,
592 SCEVExpander &Rewriter,
593 LoopInfo *LI) {
595
596 auto *GuardDecl = Intrinsic::getDeclarationIfExists(
597 L->getBlocks()[0]->getModule(), Intrinsic::experimental_guard);
598 bool HasGuards = GuardDecl && !GuardDecl->use_empty();
599
601 llvm::make_pointer_range(L->getHeader()->phis()));
602
603 // Each round of simplification iterates through the SimplifyIVUsers worklist
604 // for all current phis, then determines whether any IVs can be
605 // widened. Widening adds new phis to LoopPhis, inducing another round of
606 // simplification on the wide IVs.
607 bool Changed = false;
608 while (!LoopPhis.empty()) {
609 // Evaluate as many IV expressions as possible before widening any IVs. This
610 // forces SCEV to set no-wrap flags before evaluating sign/zero
611 // extension. The first time SCEV attempts to normalize sign/zero extension,
612 // the result becomes final. So for the most predictable results, we delay
613 // evaluation of sign/zero extend evaluation until needed, and avoid running
614 // other SCEV based analysis prior to simplifyAndExtend.
615 do {
616 PHINode *CurrIV = LoopPhis.pop_back_val();
617
618 // Information about sign/zero extensions of CurrIV.
619 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
620
621 const auto &[C, U] = simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts,
622 Rewriter, &Visitor);
623
624 Changed |= C;
625 RunUnswitching |= U;
626 if (Visitor.WI.WidestNativeType) {
627 WideIVs.push_back(Visitor.WI);
628 }
629 } while(!LoopPhis.empty());
630
631 // Continue if we disallowed widening.
632 if (!WidenIndVars)
633 continue;
634
635 for (; !WideIVs.empty(); WideIVs.pop_back()) {
636 unsigned ElimExt;
637 unsigned Widened;
638 if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
639 DT, DeadInsts, ElimExt, Widened,
640 HasGuards, UsePostIncrementRanges)) {
641 NumElimExt += ElimExt;
642 NumWidened += Widened;
643 Changed = true;
644 LoopPhis.push_back(WidePhi);
645 }
646 }
647 }
648 return Changed;
649}
650
651//===----------------------------------------------------------------------===//
652// linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
653//===----------------------------------------------------------------------===//
654
655/// Given an Value which is hoped to be part of an add recurance in the given
656/// loop, return the associated Phi node if so. Otherwise, return null. Note
657/// that this is less general than SCEVs AddRec checking.
659 Instruction *IncI = dyn_cast<Instruction>(IncV);
660 if (!IncI)
661 return nullptr;
662
663 switch (IncI->getOpcode()) {
664 case Instruction::Add:
665 case Instruction::Sub:
666 break;
667 case Instruction::GetElementPtr:
668 // An IV counter must preserve its type.
669 if (IncI->getNumOperands() == 2)
670 break;
671 [[fallthrough]];
672 default:
673 return nullptr;
674 }
675
676 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
677 if (Phi && Phi->getParent() == L->getHeader()) {
678 if (L->isLoopInvariant(IncI->getOperand(1)))
679 return Phi;
680 return nullptr;
681 }
682 if (IncI->getOpcode() == Instruction::GetElementPtr)
683 return nullptr;
684
685 // Allow add/sub to be commuted.
686 Phi = dyn_cast<PHINode>(IncI->getOperand(1));
687 if (Phi && Phi->getParent() == L->getHeader()) {
688 if (L->isLoopInvariant(IncI->getOperand(0)))
689 return Phi;
690 }
691 return nullptr;
692}
693
694/// Whether the current loop exit test is based on this value. Currently this
695/// is limited to a direct use in the loop condition.
696static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
697 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
698 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
699 // TODO: Allow non-icmp loop test.
700 if (!ICmp)
701 return false;
702
703 // TODO: Allow indirect use.
704 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
705}
706
707/// linearFunctionTestReplace policy. Return true unless we can show that the
708/// current exit test is already sufficiently canonical.
709static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
710 assert(L->getLoopLatch() && "Must be in simplified form");
711
712 // Avoid converting a constant or loop invariant test back to a runtime
713 // test. This is critical for when SCEV's cached ExitCount is less precise
714 // than the current IR (such as after we've proven a particular exit is
715 // actually dead and thus the BE count never reaches our ExitCount.)
716 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
717 if (L->isLoopInvariant(BI->getCondition()))
718 return false;
719
720 // Do LFTR to simplify the exit condition to an ICMP.
721 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
722 if (!Cond)
723 return true;
724
725 // Do LFTR to simplify the exit ICMP to EQ/NE
726 ICmpInst::Predicate Pred = Cond->getPredicate();
727 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
728 return true;
729
730 // Look for a loop invariant RHS
731 Value *LHS = Cond->getOperand(0);
732 Value *RHS = Cond->getOperand(1);
733 if (!L->isLoopInvariant(RHS)) {
734 if (!L->isLoopInvariant(LHS))
735 return true;
736 std::swap(LHS, RHS);
737 }
738 // Look for a simple IV counter LHS
739 PHINode *Phi = dyn_cast<PHINode>(LHS);
740 if (!Phi)
741 Phi = getLoopPhiForCounter(LHS, L);
742
743 if (!Phi)
744 return true;
745
746 // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
747 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
748 if (Idx < 0)
749 return true;
750
751 // Do LFTR if the exit condition's IV is *not* a simple counter.
752 Value *IncV = Phi->getIncomingValue(Idx);
753 return Phi != getLoopPhiForCounter(IncV, L);
754}
755
756/// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
757/// down to checking that all operands are constant and listing instructions
758/// that may hide undef.
760 unsigned Depth) {
761 if (isa<Constant>(V))
762 return !isa<UndefValue>(V);
763
764 if (Depth >= 6)
765 return false;
766
767 // Conservatively handle non-constant non-instructions. For example, Arguments
768 // may be undef.
769 Instruction *I = dyn_cast<Instruction>(V);
770 if (!I)
771 return false;
772
773 // Load and return values may be undef.
774 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
775 return false;
776
777 // Optimistically handle other instructions.
778 for (Value *Op : I->operands()) {
779 if (!Visited.insert(Op).second)
780 continue;
781 if (!hasConcreteDefImpl(Op, Visited, Depth+1))
782 return false;
783 }
784 return true;
785}
786
787/// Return true if the given value is concrete. We must prove that undef can
788/// never reach it.
789///
790/// TODO: If we decide that this is a good approach to checking for undef, we
791/// may factor it into a common location.
792static bool hasConcreteDef(Value *V) {
794 Visited.insert(V);
795 return hasConcreteDefImpl(V, Visited, 0);
796}
797
798/// Return true if the given phi is a "counter" in L. A counter is an
799/// add recurance (of integer or pointer type) with an arbitrary start, and a
800/// step of 1. Note that L must have exactly one latch.
801static bool isLoopCounter(PHINode* Phi, Loop *L,
802 ScalarEvolution *SE) {
803 assert(Phi->getParent() == L->getHeader());
804 assert(L->getLoopLatch());
805
806 if (!SE->isSCEVable(Phi->getType()))
807 return false;
808
809 const SCEV *S = SE->getSCEV(Phi);
811 return false;
812
813 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
814 Value *IncV = Phi->getIncomingValue(LatchIdx);
815 return (getLoopPhiForCounter(IncV, L) == Phi &&
816 isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
817}
818
819/// Search the loop header for a loop counter (anadd rec w/step of one)
820/// suitable for use by LFTR. If multiple counters are available, select the
821/// "best" one based profitable heuristics.
822///
823/// BECount may be an i8* pointer type. The pointer difference is already
824/// valid count without scaling the address stride, so it remains a pointer
825/// expression as far as SCEV is concerned.
826static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
827 const SCEV *BECount,
829 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
830
831 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
832
833 // Loop over all of the PHI nodes, looking for a simple counter.
834 PHINode *BestPhi = nullptr;
835 const SCEV *BestInit = nullptr;
836 BasicBlock *LatchBlock = L->getLoopLatch();
837 assert(LatchBlock && "Must be in simplified form");
838 const DataLayout &DL = L->getHeader()->getDataLayout();
839
840 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
841 PHINode *Phi = cast<PHINode>(I);
842 if (!isLoopCounter(Phi, L, SE))
843 continue;
844
845 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
846
847 // AR may be a pointer type, while BECount is an integer type.
848 // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
849 // AR may not be a narrower type, or we may never exit.
850 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
851 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
852 continue;
853
854 // Avoid reusing a potentially undef value to compute other values that may
855 // have originally had a concrete definition.
856 if (!hasConcreteDef(Phi)) {
857 // We explicitly allow unknown phis as long as they are already used by
858 // the loop exit test. This is legal since performing LFTR could not
859 // increase the number of undef users.
860 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
861 if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
862 !isLoopExitTestBasedOn(IncPhi, ExitingBB))
863 continue;
864 }
865
866 // Avoid introducing undefined behavior due to poison which didn't exist in
867 // the original program. (Annoyingly, the rules for poison and undef
868 // propagation are distinct, so this does NOT cover the undef case above.)
869 // We have to ensure that we don't introduce UB by introducing a use on an
870 // iteration where said IV produces poison. Our strategy here differs for
871 // pointers and integer IVs. For integers, we strip and reinfer as needed,
872 // see code in linearFunctionTestReplace. For pointers, we restrict
873 // transforms as there is no good way to reinfer inbounds once lost.
874 if (!Phi->getType()->isIntegerTy() &&
875 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
876 continue;
877
878 const SCEV *Init = AR->getStart();
879
880 if (BestPhi && !isAlmostDeadIV(BestPhi, LatchBlock, Cond)) {
881 // Don't force a live loop counter if another IV can be used.
882 if (isAlmostDeadIV(Phi, LatchBlock, Cond))
883 continue;
884
885 // Prefer to count-from-zero. This is a more "canonical" counter form. It
886 // also prefers integer to pointer IVs.
887 if (BestInit->isZero() != Init->isZero()) {
888 if (BestInit->isZero())
889 continue;
890 }
891 // If two IVs both count from zero or both count from nonzero then the
892 // narrower is likely a dead phi that has been widened. Use the wider phi
893 // to allow the other to be eliminated.
894 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
895 continue;
896 }
897 BestPhi = Phi;
898 BestInit = Init;
899 }
900 return BestPhi;
901}
902
903/// Insert an IR expression which computes the value held by the IV IndVar
904/// (which must be an loop counter w/unit stride) after the backedge of loop L
905/// is taken ExitCount times.
906static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
907 const SCEV *ExitCount, bool UsePostInc, Loop *L,
908 SCEVExpander &Rewriter, ScalarEvolution *SE) {
909 assert(isLoopCounter(IndVar, L, SE));
910 assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
911 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
912 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
913
914 // For integer IVs, truncate the IV before computing the limit unless we
915 // know apriori that the limit must be a constant when evaluated in the
916 // bitwidth of the IV. We prefer (potentially) keeping a truncate of the
917 // IV in the loop over a (potentially) expensive expansion of the widened
918 // exit count add(zext(add)) expression.
919 if (IndVar->getType()->isIntegerTy() &&
920 SE->getTypeSizeInBits(AR->getType()) >
921 SE->getTypeSizeInBits(ExitCount->getType())) {
922 const SCEV *IVInit = AR->getStart();
923 if (!isa<SCEVConstant>(IVInit) || !isa<SCEVConstant>(ExitCount))
924 AR = cast<SCEVAddRecExpr>(SE->getTruncateExpr(AR, ExitCount->getType()));
925 }
926
927 const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(*SE) : AR;
928 const SCEV *IVLimit = ARBase->evaluateAtIteration(ExitCount, *SE);
929 assert(SE->isLoopInvariant(IVLimit, L) &&
930 "Computed iteration count is not loop invariant!");
931 return Rewriter.expandCodeFor(IVLimit, ARBase->getType(),
932 ExitingBB->getTerminator());
933}
934
935/// This method rewrites the exit condition of the loop to be a canonical !=
936/// comparison against the incremented loop induction variable. This pass is
937/// able to rewrite the exit tests of any loop where the SCEV analysis can
938/// determine a loop-invariant trip count of the loop, which is actually a much
939/// broader range than just linear tests.
940bool IndVarSimplify::
941linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
942 const SCEV *ExitCount,
943 PHINode *IndVar, SCEVExpander &Rewriter) {
944 assert(L->getLoopLatch() && "Loop no longer in simplified form?");
945 assert(isLoopCounter(IndVar, L, SE));
946 Instruction * const IncVar =
947 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
948
949 // Initialize CmpIndVar to the preincremented IV.
950 Value *CmpIndVar = IndVar;
951 bool UsePostInc = false;
952
953 // If the exiting block is the same as the backedge block, we prefer to
954 // compare against the post-incremented value, otherwise we must compare
955 // against the preincremented value.
956 if (ExitingBB == L->getLoopLatch()) {
957 // For pointer IVs, we chose to not strip inbounds which requires us not
958 // to add a potentially UB introducing use. We need to either a) show
959 // the loop test we're modifying is already in post-inc form, or b) show
960 // that adding a use must not introduce UB.
961 bool SafeToPostInc =
962 IndVar->getType()->isIntegerTy() ||
963 isLoopExitTestBasedOn(IncVar, ExitingBB) ||
964 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
965 if (SafeToPostInc) {
966 UsePostInc = true;
967 CmpIndVar = IncVar;
968 }
969 }
970
971 // It may be necessary to drop nowrap flags on the incrementing instruction
972 // if either LFTR moves from a pre-inc check to a post-inc check (in which
973 // case the increment might have previously been poison on the last iteration
974 // only) or if LFTR switches to a different IV that was previously dynamically
975 // dead (and as such may be arbitrarily poison). We remove any nowrap flags
976 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
977 // check), because the pre-inc addrec flags may be adopted from the original
978 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
979 // TODO: This handling is inaccurate for one case: If we switch to a
980 // dynamically dead IV that wraps on the first loop iteration only, which is
981 // not covered by the post-inc addrec. (If the new IV was not dynamically
982 // dead, it could not be poison on the first iteration in the first place.)
983 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
984 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
985 if (BO->hasNoUnsignedWrap())
986 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
987 if (BO->hasNoSignedWrap())
988 BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
989 }
990
991 Value *ExitCnt = genLoopLimit(
992 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
993 assert(ExitCnt->getType()->isPointerTy() ==
994 IndVar->getType()->isPointerTy() &&
995 "genLoopLimit missed a cast");
996
997 // Insert a new icmp_ne or icmp_eq instruction before the branch.
998 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1000 if (L->contains(BI->getSuccessor(0)))
1001 P = ICmpInst::ICMP_NE;
1002 else
1003 P = ICmpInst::ICMP_EQ;
1004
1005 IRBuilder<> Builder(BI);
1006
1007 // The new loop exit condition should reuse the debug location of the
1008 // original loop exit condition.
1009 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1010 Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1011
1012 // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1013 // avoid the expensive expansion of the limit expression in the wider type,
1014 // emit a truncate to narrow the IV to the ExitCount type. This is safe
1015 // since we know (from the exit count bitwidth), that we can't self-wrap in
1016 // the narrower type.
1017 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1018 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1019 if (CmpIndVarSize > ExitCntSize) {
1020 assert(!CmpIndVar->getType()->isPointerTy() &&
1021 !ExitCnt->getType()->isPointerTy());
1022
1023 // Before resorting to actually inserting the truncate, use the same
1024 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1025 // the other side of the comparison instead. We still evaluate the limit
1026 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1027 // a truncate within in.
1028 bool Extended = false;
1029 const SCEV *IV = SE->getSCEV(CmpIndVar);
1030 const SCEV *TruncatedIV = SE->getTruncateExpr(IV, ExitCnt->getType());
1031 const SCEV *ZExtTrunc =
1032 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1033
1034 if (ZExtTrunc == IV) {
1035 Extended = true;
1036 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1037 "wide.trip.count");
1038 } else {
1039 const SCEV *SExtTrunc =
1040 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1041 if (SExtTrunc == IV) {
1042 Extended = true;
1043 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1044 "wide.trip.count");
1045 }
1046 }
1047
1048 if (Extended) {
1049 bool Discard;
1050 L->makeLoopInvariant(ExitCnt, Discard);
1051 } else
1052 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1053 "lftr.wideiv");
1054 }
1055 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1056 << " LHS:" << *CmpIndVar << '\n'
1057 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1058 << "\n"
1059 << " RHS:\t" << *ExitCnt << "\n"
1060 << "ExitCount:\t" << *ExitCount << "\n"
1061 << " was: " << *BI->getCondition() << "\n");
1062
1063 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1064 Value *OrigCond = BI->getCondition();
1065 // It's tempting to use replaceAllUsesWith here to fully replace the old
1066 // comparison, but that's not immediately safe, since users of the old
1067 // comparison may not be dominated by the new comparison. Instead, just
1068 // update the branch to use the new comparison; in the common case this
1069 // will make old comparison dead.
1070 BI->setCondition(Cond);
1071 DeadInsts.emplace_back(OrigCond);
1072
1073 ++NumLFTR;
1074 return true;
1075}
1076
1077//===----------------------------------------------------------------------===//
1078// sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1079//===----------------------------------------------------------------------===//
1080
1081/// If there's a single exit block, sink any loop-invariant values that
1082/// were defined in the preheader but not used inside the loop into the
1083/// exit block to reduce register pressure in the loop.
1084bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1085 BasicBlock *ExitBlock = L->getExitBlock();
1086 if (!ExitBlock) return false;
1087
1088 BasicBlock *Preheader = L->getLoopPreheader();
1089 if (!Preheader) return false;
1090
1091 bool MadeAnyChanges = false;
1093
1094 // Skip BB Terminator.
1095 if (Preheader->getTerminator() == &I)
1096 continue;
1097
1098 // New instructions were inserted at the end of the preheader.
1099 if (isa<PHINode>(I))
1100 break;
1101
1102 // Don't move instructions which might have side effects, since the side
1103 // effects need to complete before instructions inside the loop. Also don't
1104 // move instructions which might read memory, since the loop may modify
1105 // memory. Note that it's okay if the instruction might have undefined
1106 // behavior: LoopSimplify guarantees that the preheader dominates the exit
1107 // block.
1108 if (I.mayHaveSideEffects() || I.mayReadFromMemory())
1109 continue;
1110
1111 // Skip debug or pseudo instructions.
1112 if (I.isDebugOrPseudoInst())
1113 continue;
1114
1115 // Skip eh pad instructions.
1116 if (I.isEHPad())
1117 continue;
1118
1119 // Don't sink alloca: we never want to sink static alloca's out of the
1120 // entry block, and correctly sinking dynamic alloca's requires
1121 // checks for stacksave/stackrestore intrinsics.
1122 // FIXME: Refactor this check somehow?
1123 if (isa<AllocaInst>(&I))
1124 continue;
1125
1126 // Determine if there is a use in or before the loop (direct or
1127 // otherwise).
1128 bool UsedInLoop = false;
1129 for (Use &U : I.uses()) {
1130 Instruction *User = cast<Instruction>(U.getUser());
1131 BasicBlock *UseBB = User->getParent();
1132 if (PHINode *P = dyn_cast<PHINode>(User)) {
1133 unsigned i =
1135 UseBB = P->getIncomingBlock(i);
1136 }
1137 if (UseBB == Preheader || L->contains(UseBB)) {
1138 UsedInLoop = true;
1139 break;
1140 }
1141 }
1142
1143 // If there is, the def must remain in the preheader.
1144 if (UsedInLoop)
1145 continue;
1146
1147 // Otherwise, sink it to the exit block.
1148 I.moveBefore(ExitBlock->getFirstInsertionPt());
1149 SE->forgetValue(&I);
1150 MadeAnyChanges = true;
1151 }
1152
1153 return MadeAnyChanges;
1154}
1155
1156static void replaceExitCond(BranchInst *BI, Value *NewCond,
1158 auto *OldCond = BI->getCondition();
1159 LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI
1160 << " with " << *NewCond << "\n");
1161 BI->setCondition(NewCond);
1162 if (OldCond->use_empty())
1163 DeadInsts.emplace_back(OldCond);
1164}
1165
1166static Constant *createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB,
1167 bool IsTaken) {
1168 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1169 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1170 auto *OldCond = BI->getCondition();
1171 return ConstantInt::get(OldCond->getType(),
1172 IsTaken ? ExitIfTrue : !ExitIfTrue);
1173}
1174
1175static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1177 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1178 auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken);
1179 replaceExitCond(BI, NewCond, DeadInsts);
1180}
1181
1183 LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1184 ScalarEvolution &SE) {
1185 assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1186 auto *LoopPreheader = L->getLoopPreheader();
1187 auto *LoopHeader = L->getHeader();
1189 for (auto &PN : LoopHeader->phis()) {
1190 auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
1191 for (User *U : PN.users())
1192 Worklist.push_back(cast<Instruction>(U));
1193 SE.forgetValue(&PN);
1194 PN.replaceAllUsesWith(PreheaderIncoming);
1195 DeadInsts.emplace_back(&PN);
1196 }
1197
1198 // Replacing with the preheader value will often allow IV users to simplify
1199 // (especially if the preheader value is a constant).
1201 while (!Worklist.empty()) {
1202 auto *I = cast<Instruction>(Worklist.pop_back_val());
1203 if (!Visited.insert(I).second)
1204 continue;
1205
1206 // Don't simplify instructions outside the loop.
1207 if (!L->contains(I))
1208 continue;
1209
1210 Value *Res = simplifyInstruction(I, I->getDataLayout());
1211 if (Res && LI->replacementPreservesLCSSAForm(I, Res)) {
1212 for (User *U : I->users())
1213 Worklist.push_back(cast<Instruction>(U));
1214 I->replaceAllUsesWith(Res);
1215 DeadInsts.emplace_back(I);
1216 }
1217 }
1218}
1219
1220static Value *
1223 SCEVExpander &Rewriter) {
1224 ICmpInst::Predicate InvariantPred = LIP.Pred;
1225 BasicBlock *Preheader = L->getLoopPreheader();
1226 assert(Preheader && "Preheader doesn't exist");
1227 Rewriter.setInsertPoint(Preheader->getTerminator());
1228 auto *LHSV = Rewriter.expandCodeFor(LIP.LHS);
1229 auto *RHSV = Rewriter.expandCodeFor(LIP.RHS);
1230 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1231 if (ExitIfTrue)
1232 InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1233 IRBuilder<> Builder(Preheader->getTerminator());
1234 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1235 return Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1236 BI->getCondition()->getName());
1237}
1238
1239static std::optional<Value *>
1240createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB,
1241 const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1242 ScalarEvolution *SE, SCEVExpander &Rewriter) {
1243 CmpPredicate Pred = ICmp->getCmpPredicate();
1244 Value *LHS = ICmp->getOperand(0);
1245 Value *RHS = ICmp->getOperand(1);
1246
1247 // 'LHS pred RHS' should now mean that we stay in loop.
1248 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1249 if (Inverted)
1251
1252 const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1253 const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1254 // Can we prove it to be trivially true or false?
1255 if (auto EV = SE->evaluatePredicateAt(Pred, LHSS, RHSS, BI))
1256 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ !*EV);
1257
1258 auto *ARTy = LHSS->getType();
1259 auto *MaxIterTy = MaxIter->getType();
1260 // If possible, adjust types.
1261 if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1262 MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1263 else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1264 const SCEV *MinusOne = SE->getMinusOne(ARTy);
1265 const SCEV *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1266 if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1267 MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1268 }
1269
1270 if (SkipLastIter) {
1271 // Semantically skip last iter is "subtract 1, do not bother about unsigned
1272 // wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal
1273 // with umin in a smart way, but umin(a, b) - 1 will likely not simplify.
1274 // So we manually construct umin(a - 1, b - 1).
1276 if (auto *UMin = dyn_cast<SCEVUMinExpr>(MaxIter)) {
1277 for (const SCEV *Op : UMin->operands())
1278 Elements.push_back(SE->getMinusSCEV(Op, SE->getOne(Op->getType())));
1279 MaxIter = SE->getUMinFromMismatchedTypes(Elements);
1280 } else
1281 MaxIter = SE->getMinusSCEV(MaxIter, SE->getOne(MaxIter->getType()));
1282 }
1283
1284 // Check if there is a loop-invariant predicate equivalent to our check.
1285 auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1286 L, BI, MaxIter);
1287 if (!LIP)
1288 return std::nullopt;
1289
1290 // Can we prove it to be trivially true?
1291 if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1292 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ false);
1293 else
1294 return createInvariantCond(L, ExitingBB, *LIP, Rewriter);
1295}
1296
1298 const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter,
1299 bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter,
1301 assert(
1302 (L->contains(BI->getSuccessor(0)) != L->contains(BI->getSuccessor(1))) &&
1303 "Not a loop exit!");
1304
1305 // For branch that stays in loop by TRUE condition, go through AND. For branch
1306 // that stays in loop by FALSE condition, go through OR. Both gives the
1307 // similar logic: "stay in loop iff all conditions are true(false)".
1308 bool Inverted = L->contains(BI->getSuccessor(1));
1309 SmallVector<ICmpInst *, 4> LeafConditions;
1310 SmallVector<Value *, 4> Worklist;
1312 Value *OldCond = BI->getCondition();
1313 Visited.insert(OldCond);
1314 Worklist.push_back(OldCond);
1315
1316 auto GoThrough = [&](Value *V) {
1317 Value *LHS = nullptr, *RHS = nullptr;
1318 if (Inverted) {
1319 if (!match(V, m_LogicalOr(m_Value(LHS), m_Value(RHS))))
1320 return false;
1321 } else {
1322 if (!match(V, m_LogicalAnd(m_Value(LHS), m_Value(RHS))))
1323 return false;
1324 }
1325 if (Visited.insert(LHS).second)
1326 Worklist.push_back(LHS);
1327 if (Visited.insert(RHS).second)
1328 Worklist.push_back(RHS);
1329 return true;
1330 };
1331
1332 do {
1333 Value *Curr = Worklist.pop_back_val();
1334 // Go through AND/OR conditions. Collect leaf ICMPs. We only care about
1335 // those with one use, to avoid instruction duplication.
1336 if (Curr->hasOneUse())
1337 if (!GoThrough(Curr))
1338 if (auto *ICmp = dyn_cast<ICmpInst>(Curr))
1339 LeafConditions.push_back(ICmp);
1340 } while (!Worklist.empty());
1341
1342 // If the current basic block has the same exit count as the whole loop, and
1343 // it consists of multiple icmp's, try to collect all icmp's that give exact
1344 // same exit count. For all other icmp's, we could use one less iteration,
1345 // because their value on the last iteration doesn't really matter.
1346 SmallPtrSet<ICmpInst *, 4> ICmpsFailingOnLastIter;
1347 if (!SkipLastIter && LeafConditions.size() > 1 &&
1348 SE->getExitCount(L, ExitingBB,
1349 ScalarEvolution::ExitCountKind::SymbolicMaximum) ==
1350 MaxIter)
1351 for (auto *ICmp : LeafConditions) {
1352 auto EL = SE->computeExitLimitFromCond(L, ICmp, Inverted,
1353 /*ControlsExit*/ false);
1354 const SCEV *ExitMax = EL.SymbolicMaxNotTaken;
1355 if (isa<SCEVCouldNotCompute>(ExitMax))
1356 continue;
1357 // They could be of different types (specifically this happens after
1358 // IV widening).
1359 auto *WiderType =
1360 SE->getWiderType(ExitMax->getType(), MaxIter->getType());
1361 const SCEV *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType);
1362 const SCEV *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType);
1363 if (WideExitMax == WideMaxIter)
1364 ICmpsFailingOnLastIter.insert(ICmp);
1365 }
1366
1367 bool Changed = false;
1368 for (auto *OldCond : LeafConditions) {
1369 // Skip last iteration for this icmp under one of two conditions:
1370 // - We do it for all conditions;
1371 // - There is another ICmp that would fail on last iter, so this one doesn't
1372 // really matter.
1373 bool OptimisticSkipLastIter = SkipLastIter;
1374 if (!OptimisticSkipLastIter) {
1375 if (ICmpsFailingOnLastIter.size() > 1)
1376 OptimisticSkipLastIter = true;
1377 else if (ICmpsFailingOnLastIter.size() == 1)
1378 OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(OldCond);
1379 }
1380 if (auto Replaced =
1381 createReplacement(OldCond, L, ExitingBB, MaxIter, Inverted,
1382 OptimisticSkipLastIter, SE, Rewriter)) {
1383 Changed = true;
1384 auto *NewCond = *Replaced;
1385 if (auto *NCI = dyn_cast<Instruction>(NewCond)) {
1386 NCI->setName(OldCond->getName() + ".first_iter");
1387 }
1388 LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond
1389 << " with " << *NewCond << "\n");
1390 assert(OldCond->hasOneUse() && "Must be!");
1391 OldCond->replaceAllUsesWith(NewCond);
1392 DeadInsts.push_back(OldCond);
1393 // Make sure we no longer consider this condition as failing on last
1394 // iteration.
1395 ICmpsFailingOnLastIter.erase(OldCond);
1396 }
1397 }
1398 return Changed;
1399}
1400
1401bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1402 // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1403 // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1404 // never reaches the icmp since the zext doesn't fold to an AddRec unless
1405 // it already has flags. The alternative to this would be to extending the
1406 // set of "interesting" IV users to include the icmp, but doing that
1407 // regresses results in practice by querying SCEVs before trip counts which
1408 // rely on them which results in SCEV caching sub-optimal answers. The
1409 // concern about caching sub-optimal results is why we only query SCEVs of
1410 // the loop invariant RHS here.
1411 SmallVector<BasicBlock*, 16> ExitingBlocks;
1412 L->getExitingBlocks(ExitingBlocks);
1413 bool Changed = false;
1414 for (auto *ExitingBB : ExitingBlocks) {
1415 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1416 if (!BI)
1417 continue;
1418 assert(BI->isConditional() && "exit branch must be conditional");
1419
1420 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1421 if (!ICmp || !ICmp->hasOneUse())
1422 continue;
1423
1424 auto *LHS = ICmp->getOperand(0);
1425 auto *RHS = ICmp->getOperand(1);
1426 // For the range reasoning, avoid computing SCEVs in the loop to avoid
1427 // poisoning cache with sub-optimal results. For the must-execute case,
1428 // this is a neccessary precondition for correctness.
1429 if (!L->isLoopInvariant(RHS)) {
1430 if (!L->isLoopInvariant(LHS))
1431 continue;
1432 // Same logic applies for the inverse case
1433 std::swap(LHS, RHS);
1434 }
1435
1436 // Match (icmp signed-cond zext, RHS)
1437 Value *LHSOp = nullptr;
1438 if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
1439 continue;
1440
1441 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1442 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1443 auto FullCR = ConstantRange::getFull(InnerBitWidth);
1444 FullCR = FullCR.zeroExtend(OuterBitWidth);
1445 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1446 if (FullCR.contains(RHSCR)) {
1447 // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1448 // replace the signed condition with the unsigned version.
1449 ICmp->setPredicate(ICmp->getUnsignedPredicate());
1450 Changed = true;
1451 // Note: No SCEV invalidation needed. We've changed the predicate, but
1452 // have not changed exit counts, or the values produced by the compare.
1453 continue;
1454 }
1455 }
1456
1457 // Now that we've canonicalized the condition to match the extend,
1458 // see if we can rotate the extend out of the loop.
1459 for (auto *ExitingBB : ExitingBlocks) {
1460 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1461 if (!BI)
1462 continue;
1463 assert(BI->isConditional() && "exit branch must be conditional");
1464
1465 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1466 if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
1467 continue;
1468
1469 bool Swapped = false;
1470 auto *LHS = ICmp->getOperand(0);
1471 auto *RHS = ICmp->getOperand(1);
1472 if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS))
1473 // Nothing to rotate
1474 continue;
1475 if (L->isLoopInvariant(LHS)) {
1476 // Same logic applies for the inverse case until we actually pick
1477 // which operand of the compare to update.
1478 Swapped = true;
1479 std::swap(LHS, RHS);
1480 }
1481 assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
1482
1483 // Match (icmp unsigned-cond zext, RHS)
1484 // TODO: Extend to handle corresponding sext/signed-cmp case
1485 // TODO: Extend to other invertible functions
1486 Value *LHSOp = nullptr;
1487 if (!match(LHS, m_ZExt(m_Value(LHSOp))))
1488 continue;
1489
1490 // In general, we only rotate if we can do so without increasing the number
1491 // of instructions. The exception is when we have an zext(add-rec). The
1492 // reason for allowing this exception is that we know we need to get rid
1493 // of the zext for SCEV to be able to compute a trip count for said loops;
1494 // we consider the new trip count valuable enough to increase instruction
1495 // count by one.
1496 if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))
1497 continue;
1498
1499 // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1500 // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1501 // when zext is loop varying and RHS is loop invariant. This converts
1502 // loop varying work to loop-invariant work.
1503 auto doRotateTransform = [&]() {
1504 assert(ICmp->isUnsigned() && "must have proven unsigned already");
1505 auto *NewRHS = CastInst::Create(
1506 Instruction::Trunc, RHS, LHSOp->getType(), "",
1507 L->getLoopPreheader()->getTerminator()->getIterator());
1508 // NewRHS is an operation that has been hoisted out of the loop, and
1509 // therefore should have a dropped location.
1510 NewRHS->setDebugLoc(DebugLoc::getDropped());
1511 ICmp->setOperand(Swapped ? 1 : 0, LHSOp);
1512 ICmp->setOperand(Swapped ? 0 : 1, NewRHS);
1513 // Samesign flag cannot be preserved after narrowing the compare.
1514 ICmp->setSameSign(false);
1515 if (LHS->use_empty())
1516 DeadInsts.push_back(LHS);
1517 };
1518
1519 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1520 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1521 auto FullCR = ConstantRange::getFull(InnerBitWidth);
1522 FullCR = FullCR.zeroExtend(OuterBitWidth);
1523 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1524 if (FullCR.contains(RHSCR)) {
1525 doRotateTransform();
1526 Changed = true;
1527 // Note, we are leaving SCEV in an unfortunately imprecise case here
1528 // as rotation tends to reveal information about trip counts not
1529 // previously visible.
1530 continue;
1531 }
1532 }
1533
1534 return Changed;
1535}
1536
1537bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1538 SmallVector<BasicBlock*, 16> ExitingBlocks;
1539 L->getExitingBlocks(ExitingBlocks);
1540
1541 // Remove all exits which aren't both rewriteable and execute on every
1542 // iteration.
1543 llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1544 // If our exitting block exits multiple loops, we can only rewrite the
1545 // innermost one. Otherwise, we're changing how many times the innermost
1546 // loop runs before it exits.
1547 if (LI->getLoopFor(ExitingBB) != L)
1548 return true;
1549
1550 // Can't rewrite non-branch yet.
1551 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1552 if (!BI)
1553 return true;
1554
1555 // Likewise, the loop latch must be dominated by the exiting BB.
1556 if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1557 return true;
1558
1559 if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
1560 // If already constant, nothing to do. However, if this is an
1561 // unconditional exit, we can still replace header phis with their
1562 // preheader value.
1563 if (!L->contains(BI->getSuccessor(CI->isNullValue())))
1564 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1565 return true;
1566 }
1567
1568 return false;
1569 });
1570
1571 if (ExitingBlocks.empty())
1572 return false;
1573
1574 // Get a symbolic upper bound on the loop backedge taken count.
1575 const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L);
1576 if (isa<SCEVCouldNotCompute>(MaxBECount))
1577 return false;
1578
1579 // Visit our exit blocks in order of dominance. We know from the fact that
1580 // all exits must dominate the latch, so there is a total dominance order
1581 // between them.
1582 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1583 // std::sort sorts in ascending order, so we want the inverse of
1584 // the normal dominance relation.
1585 if (A == B) return false;
1586 if (DT->properlyDominates(A, B))
1587 return true;
1588 else {
1589 assert(DT->properlyDominates(B, A) &&
1590 "expected total dominance order!");
1591 return false;
1592 }
1593 });
1594#ifdef ASSERT
1595 for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1596 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1597 }
1598#endif
1599
1600 bool Changed = false;
1601 bool SkipLastIter = false;
1602 const SCEV *CurrMaxExit = SE->getCouldNotCompute();
1603 auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) {
1604 if (SkipLastIter || isa<SCEVCouldNotCompute>(MaxExitCount))
1605 return;
1606 if (isa<SCEVCouldNotCompute>(CurrMaxExit))
1607 CurrMaxExit = MaxExitCount;
1608 else
1609 CurrMaxExit = SE->getUMinFromMismatchedTypes(CurrMaxExit, MaxExitCount);
1610 // If the loop has more than 1 iteration, all further checks will be
1611 // executed 1 iteration less.
1612 if (CurrMaxExit == MaxBECount)
1613 SkipLastIter = true;
1614 };
1615 SmallPtrSet<const SCEV *, 8> DominatingExactExitCounts;
1616 for (BasicBlock *ExitingBB : ExitingBlocks) {
1617 const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBB);
1618 const SCEV *MaxExitCount = SE->getExitCount(
1619 L, ExitingBB, ScalarEvolution::ExitCountKind::SymbolicMaximum);
1620 if (isa<SCEVCouldNotCompute>(ExactExitCount)) {
1621 // Okay, we do not know the exit count here. Can we at least prove that it
1622 // will remain the same within iteration space?
1623 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1624 auto OptimizeCond = [&](bool SkipLastIter) {
1625 return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB,
1626 MaxBECount, SkipLastIter,
1627 SE, Rewriter, DeadInsts);
1628 };
1629
1630 // TODO: We might have proved that we can skip the last iteration for
1631 // this check. In this case, we only want to check the condition on the
1632 // pre-last iteration (MaxBECount - 1). However, there is a nasty
1633 // corner case:
1634 //
1635 // for (i = len; i != 0; i--) { ... check (i ult X) ... }
1636 //
1637 // If we could not prove that len != 0, then we also could not prove that
1638 // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1639 // OptimizeCond will likely not prove anything for it, even if it could
1640 // prove the same fact for len.
1641 //
1642 // As a temporary solution, we query both last and pre-last iterations in
1643 // hope that we will be able to prove triviality for at least one of
1644 // them. We can stop querying MaxBECount for this case once SCEV
1645 // understands that (MaxBECount - 1) will not overflow here.
1646 if (OptimizeCond(false))
1647 Changed = true;
1648 else if (SkipLastIter && OptimizeCond(true))
1649 Changed = true;
1650 UpdateSkipLastIter(MaxExitCount);
1651 continue;
1652 }
1653
1654 UpdateSkipLastIter(ExactExitCount);
1655
1656 // If we know we'd exit on the first iteration, rewrite the exit to
1657 // reflect this. This does not imply the loop must exit through this
1658 // exit; there may be an earlier one taken on the first iteration.
1659 // We know that the backedge can't be taken, so we replace all
1660 // the header PHIs with values coming from the preheader.
1661 if (ExactExitCount->isZero()) {
1662 foldExit(L, ExitingBB, true, DeadInsts);
1663 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1664 Changed = true;
1665 continue;
1666 }
1667
1668 assert(ExactExitCount->getType()->isIntegerTy() &&
1669 MaxBECount->getType()->isIntegerTy() &&
1670 "Exit counts must be integers");
1671
1672 Type *WiderType =
1673 SE->getWiderType(MaxBECount->getType(), ExactExitCount->getType());
1674 ExactExitCount = SE->getNoopOrZeroExtend(ExactExitCount, WiderType);
1675 MaxBECount = SE->getNoopOrZeroExtend(MaxBECount, WiderType);
1676 assert(MaxBECount->getType() == ExactExitCount->getType());
1677
1678 // Can we prove that some other exit must be taken strictly before this
1679 // one?
1680 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxBECount,
1681 ExactExitCount)) {
1682 foldExit(L, ExitingBB, false, DeadInsts);
1683 Changed = true;
1684 continue;
1685 }
1686
1687 // As we run, keep track of which exit counts we've encountered. If we
1688 // find a duplicate, we've found an exit which would have exited on the
1689 // exiting iteration, but (from the visit order) strictly follows another
1690 // which does the same and is thus dead.
1691 if (!DominatingExactExitCounts.insert(ExactExitCount).second) {
1692 foldExit(L, ExitingBB, false, DeadInsts);
1693 Changed = true;
1694 continue;
1695 }
1696
1697 // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1698 // here. If we kept track of the min of dominanting exits so far, we could
1699 // discharge exits with EC >= MDEC. This is less powerful than the existing
1700 // transform (since later exits aren't considered), but potentially more
1701 // powerful for any case where SCEV can prove a >=u b, but neither a == b
1702 // or a >u b. Such a case is not currently known.
1703 }
1704 return Changed;
1705}
1706
1707bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1708 SmallVector<BasicBlock*, 16> ExitingBlocks;
1709 L->getExitingBlocks(ExitingBlocks);
1710
1711 // Finally, see if we can rewrite our exit conditions into a loop invariant
1712 // form. If we have a read-only loop, and we can tell that we must exit down
1713 // a path which does not need any of the values computed within the loop, we
1714 // can rewrite the loop to exit on the first iteration. Note that this
1715 // doesn't either a) tell us the loop exits on the first iteration (unless
1716 // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1717 // This transformation looks a lot like a restricted form of dead loop
1718 // elimination, but restricted to read-only loops and without neccesssarily
1719 // needing to kill the loop entirely.
1720 if (!LoopPredication)
1721 return false;
1722
1723 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1724 // through *explicit* control flow. We have to eliminate the possibility of
1725 // implicit exits (see below) before we know it's truly exact.
1726 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1727 if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC))
1728 return false;
1729
1730 assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1731 assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1732
1733 auto BadExit = [&](BasicBlock *ExitingBB) {
1734 // If our exiting block exits multiple loops, we can only rewrite the
1735 // innermost one. Otherwise, we're changing how many times the innermost
1736 // loop runs before it exits.
1737 if (LI->getLoopFor(ExitingBB) != L)
1738 return true;
1739
1740 // Can't rewrite non-branch yet.
1741 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1742 if (!BI)
1743 return true;
1744
1745 // If already constant, nothing to do.
1746 if (isa<Constant>(BI->getCondition()))
1747 return true;
1748
1749 // If the exit block has phis, we need to be able to compute the values
1750 // within the loop which contains them. This assumes trivially lcssa phis
1751 // have already been removed; TODO: generalize
1752 BasicBlock *ExitBlock =
1753 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1754 if (!ExitBlock->phis().empty())
1755 return true;
1756
1757 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1758 if (isa<SCEVCouldNotCompute>(ExitCount) ||
1759 !Rewriter.isSafeToExpand(ExitCount))
1760 return true;
1761
1762 assert(SE->isLoopInvariant(ExitCount, L) &&
1763 "Exit count must be loop invariant");
1764 assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1765 return false;
1766 };
1767
1768 // Make sure all exits dominate the latch. This means there is a linear chain
1769 // of exits. We check this before sorting so we have a total order.
1770 BasicBlock *Latch = L->getLoopLatch();
1771 for (BasicBlock *ExitingBB : ExitingBlocks)
1772 if (!DT->dominates(ExitingBB, Latch))
1773 return false;
1774
1775 // If we have any exits which can't be predicated themselves, than we can't
1776 // predicate any exit which isn't guaranteed to execute before it. Consider
1777 // two exits (a) and (b) which would both exit on the same iteration. If we
1778 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1779 // we could convert a loop from exiting through (a) to one exiting through
1780 // (b). Note that this problem exists only for exits with the same exit
1781 // count, and we could be more aggressive when exit counts are known inequal.
1782 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1783 // llvm::sort sorts in ascending order, so we want the inverse of
1784 // the normal dominance relation.
1785 if (A == B)
1786 return false;
1787 if (DT->properlyDominates(A, B))
1788 return true;
1789 if (DT->properlyDominates(B, A))
1790 return false;
1791 llvm_unreachable("Should have total dominance order");
1792 });
1793
1794 // Make sure our exit blocks are really a total order (i.e. a linear chain of
1795 // exits before the backedge).
1796 for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1797 assert(DT->dominates(ExitingBlocks[i - 1], ExitingBlocks[i]) &&
1798 "Not sorted by dominance");
1799
1800 // Given our sorted total order, we know that exit[j] must be evaluated
1801 // after all exit[i] such j > i.
1802 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1803 if (BadExit(ExitingBlocks[i])) {
1804 ExitingBlocks.resize(i);
1805 break;
1806 }
1807
1808 if (ExitingBlocks.empty())
1809 return false;
1810
1811 // At this point, ExitingBlocks consists of only those blocks which are
1812 // predicatable. Given that, we know we have at least one exit we can
1813 // predicate if the loop is doesn't have side effects and doesn't have any
1814 // implicit exits (because then our exact BTC isn't actually exact).
1815 // @Reviewers - As structured, this is O(I^2) for loop nests. Any
1816 // suggestions on how to improve this? I can obviously bail out for outer
1817 // loops, but that seems less than ideal. MemorySSA can find memory writes,
1818 // is that enough for *all* side effects?
1819 for (BasicBlock *BB : L->blocks())
1820 for (auto &I : *BB)
1821 // TODO:isGuaranteedToTransfer
1822 if (I.mayHaveSideEffects())
1823 return false;
1824
1825 bool Changed = false;
1826 // Finally, do the actual predication for all predicatable blocks. A couple
1827 // of notes here:
1828 // 1) We don't bother to constant fold dominated exits with identical exit
1829 // counts; that's simply a form of CSE/equality propagation and we leave
1830 // it for dedicated passes.
1831 // 2) We insert the comparison at the branch. Hoisting introduces additional
1832 // legality constraints and we leave that to dedicated logic. We want to
1833 // predicate even if we can't insert a loop invariant expression as
1834 // peeling or unrolling will likely reduce the cost of the otherwise loop
1835 // varying check.
1836 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1837 IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1838 Value *ExactBTCV = nullptr; // Lazily generated if needed.
1839 for (BasicBlock *ExitingBB : ExitingBlocks) {
1840 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1841
1842 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1843 Value *NewCond;
1844 if (ExitCount == ExactBTC) {
1845 NewCond = L->contains(BI->getSuccessor(0)) ?
1846 B.getFalse() : B.getTrue();
1847 } else {
1848 Value *ECV = Rewriter.expandCodeFor(ExitCount);
1849 if (!ExactBTCV)
1850 ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
1851 Value *RHS = ExactBTCV;
1852 if (ECV->getType() != RHS->getType()) {
1853 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1854 ECV = B.CreateZExt(ECV, WiderTy);
1855 RHS = B.CreateZExt(RHS, WiderTy);
1856 }
1857 auto Pred = L->contains(BI->getSuccessor(0)) ?
1858 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1859 NewCond = B.CreateICmp(Pred, ECV, RHS);
1860 }
1861 Value *OldCond = BI->getCondition();
1862 BI->setCondition(NewCond);
1863 if (OldCond->use_empty())
1864 DeadInsts.emplace_back(OldCond);
1865 Changed = true;
1866 RunUnswitching = true;
1867 }
1868
1869 return Changed;
1870}
1871
1872//===----------------------------------------------------------------------===//
1873// IndVarSimplify driver. Manage several subpasses of IV simplification.
1874//===----------------------------------------------------------------------===//
1875
1876bool IndVarSimplify::run(Loop *L) {
1877 // We need (and expect!) the incoming loop to be in LCSSA.
1878 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1879 "LCSSA required to run indvars!");
1880
1881 // If LoopSimplify form is not available, stay out of trouble. Some notes:
1882 // - LSR currently only supports LoopSimplify-form loops. Indvars'
1883 // canonicalization can be a pessimization without LSR to "clean up"
1884 // afterwards.
1885 // - We depend on having a preheader; in particular,
1886 // Loop::getCanonicalInductionVariable only supports loops with preheaders,
1887 // and we're in trouble if we can't find the induction variable even when
1888 // we've manually inserted one.
1889 // - LFTR relies on having a single backedge.
1890 if (!L->isLoopSimplifyForm())
1891 return false;
1892
1893 bool Changed = false;
1894 // If there are any floating-point recurrences, attempt to
1895 // transform them to use integer recurrences.
1896 Changed |= rewriteNonIntegerIVs(L);
1897
1898 // Create a rewriter object which we'll use to transform the code with.
1899 SCEVExpander Rewriter(*SE, DL, "indvars");
1900#if LLVM_ENABLE_ABI_BREAKING_CHECKS
1901 Rewriter.setDebugType(DEBUG_TYPE);
1902#endif
1903
1904 // Eliminate redundant IV users.
1905 //
1906 // Simplification works best when run before other consumers of SCEV. We
1907 // attempt to avoid evaluating SCEVs for sign/zero extend operations until
1908 // other expressions involving loop IVs have been evaluated. This helps SCEV
1909 // set no-wrap flags before normalizing sign/zero extension.
1910 Rewriter.disableCanonicalMode();
1911 Changed |= simplifyAndExtend(L, Rewriter, LI);
1912
1913 // Check to see if we can compute the final value of any expressions
1914 // that are recurrent in the loop, and substitute the exit values from the
1915 // loop into any instructions outside of the loop that use the final values
1916 // of the current expressions.
1917 if (ReplaceExitValue != NeverRepl) {
1918 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
1919 ReplaceExitValue, DeadInsts)) {
1920 NumReplaced += Rewrites;
1921 Changed = true;
1922 }
1923 }
1924
1925 // Eliminate redundant IV cycles.
1926 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
1927
1928 // Try to convert exit conditions to unsigned and rotate computation
1929 // out of the loop. Note: Handles invalidation internally if needed.
1930 Changed |= canonicalizeExitCondition(L);
1931
1932 // Try to eliminate loop exits based on analyzeable exit counts
1933 if (optimizeLoopExits(L, Rewriter)) {
1934 Changed = true;
1935 // Given we've changed exit counts, notify SCEV
1936 // Some nested loops may share same folded exit basic block,
1937 // thus we need to notify top most loop.
1938 SE->forgetTopmostLoop(L);
1939 }
1940
1941 // Try to form loop invariant tests for loop exits by changing how many
1942 // iterations of the loop run when that is unobservable.
1943 if (predicateLoopExits(L, Rewriter)) {
1944 Changed = true;
1945 // Given we've changed exit counts, notify SCEV
1946 SE->forgetLoop(L);
1947 }
1948
1949 // If we have a trip count expression, rewrite the loop's exit condition
1950 // using it.
1951 if (!DisableLFTR) {
1952 BasicBlock *PreHeader = L->getLoopPreheader();
1953
1954 SmallVector<BasicBlock*, 16> ExitingBlocks;
1955 L->getExitingBlocks(ExitingBlocks);
1956 for (BasicBlock *ExitingBB : ExitingBlocks) {
1957 // Can't rewrite non-branch yet.
1958 if (!isa<BranchInst>(ExitingBB->getTerminator()))
1959 continue;
1960
1961 // If our exitting block exits multiple loops, we can only rewrite the
1962 // innermost one. Otherwise, we're changing how many times the innermost
1963 // loop runs before it exits.
1964 if (LI->getLoopFor(ExitingBB) != L)
1965 continue;
1966
1967 if (!needsLFTR(L, ExitingBB))
1968 continue;
1969
1970 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1971 if (isa<SCEVCouldNotCompute>(ExitCount))
1972 continue;
1973
1974 // This was handled above, but as we form SCEVs, we can sometimes refine
1975 // existing ones; this allows exit counts to be folded to zero which
1976 // weren't when optimizeLoopExits saw them. Arguably, we should iterate
1977 // until stable to handle cases like this better.
1978 if (ExitCount->isZero())
1979 continue;
1980
1981 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
1982 if (!IndVar)
1983 continue;
1984
1985 // Avoid high cost expansions. Note: This heuristic is questionable in
1986 // that our definition of "high cost" is not exactly principled.
1987 if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
1988 TTI, PreHeader->getTerminator()))
1989 continue;
1990
1991 if (!Rewriter.isSafeToExpand(ExitCount))
1992 continue;
1993
1994 Changed |= linearFunctionTestReplace(L, ExitingBB,
1995 ExitCount, IndVar,
1996 Rewriter);
1997 }
1998 }
1999 // Clear the rewriter cache, because values that are in the rewriter's cache
2000 // can be deleted in the loop below, causing the AssertingVH in the cache to
2001 // trigger.
2002 Rewriter.clear();
2003
2004 // Now that we're done iterating through lists, clean up any instructions
2005 // which are now dead.
2006 while (!DeadInsts.empty()) {
2007 Value *V = DeadInsts.pop_back_val();
2008
2009 if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
2010 Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
2011 else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
2012 Changed |=
2013 RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2014 }
2015
2016 // The Rewriter may not be used from this point on.
2017
2018 // Loop-invariant instructions in the preheader that aren't used in the
2019 // loop may be sunk below the loop to reduce register pressure.
2020 Changed |= sinkUnusedInvariants(L);
2021
2022 // rewriteFirstIterationLoopExitValues does not rely on the computation of
2023 // trip count and therefore can further simplify exit values in addition to
2024 // rewriteLoopExitValues.
2025 Changed |= rewriteFirstIterationLoopExitValues(L);
2026
2027 // Clean up dead instructions.
2028 Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2029
2030 // Check a post-condition.
2031 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2032 "Indvars did not preserve LCSSA!");
2033 if (VerifyMemorySSA && MSSAU)
2034 MSSAU->getMemorySSA()->verifyMemorySSA();
2035
2036 return Changed;
2037}
2038
2041 LPMUpdater &) {
2042 Function *F = L.getHeader()->getParent();
2043 const DataLayout &DL = F->getDataLayout();
2044
2045 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2046 WidenIndVars && AllowIVWidening);
2047 if (!IVS.run(&L))
2048 return PreservedAnalyses::all();
2049
2050 auto PA = getLoopPassPreservedAnalyses();
2051 PA.preserveSet<CFGAnalyses>();
2052 if (IVS.runUnswitching()) {
2054 PA.preserve<ShouldRunExtraSimpleLoopUnswitch>();
2055 }
2056
2057 if (AR.MSSA)
2058 PA.preserve<MemorySSAAnalysis>();
2059 return PA;
2060}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:687
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
static Value * genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, const SCEV *ExitCount, bool UsePostInc, Loop *L, SCEVExpander &Rewriter, ScalarEvolution *SE)
Insert an IR expression which computes the value held by the IV IndVar (which must be an loop counter...
static void replaceExitCond(BranchInst *BI, Value *NewCond, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
static cl::opt< bool > DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), cl::desc("Disable Linear Function Test Replace optimization"))
static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB)
Whether the current loop exit test is based on this value.
static cl::opt< ReplaceExitVal > ReplaceExitValue("replexitval", cl::Hidden, cl::init(OnlyCheapRepl), cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), clEnumValN(OnlyCheapRepl, "cheap", "only replace exit value when the cost is cheap"), clEnumValN(UnusedIndVarInLoop, "unusedindvarinloop", "only replace exit value when it is an unused " "induction variable in the loop and has cheap replacement cost"), clEnumValN(NoHardUse, "noharduse", "only replace exit values when loop def likely dead"), clEnumValN(AlwaysRepl, "always", "always replace exit value whenever possible")))
static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, const TargetTransformInfo *TTI)
Update information about the induction variable that is extended by this sign or zero extend operatio...
static void replaceLoopPHINodesWithPreheaderValues(LoopInfo *LI, Loop *L, SmallVectorImpl< WeakTrackingVH > &DeadInsts, ScalarEvolution &SE)
static bool needsLFTR(Loop *L, BasicBlock *ExitingBB)
linearFunctionTestReplace policy.
static bool optimizeLoopExitWithUnknownExitCount(const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter, bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
static Value * createInvariantCond(const Loop *L, BasicBlock *ExitingBB, const ScalarEvolution::LoopInvariantPredicate &LIP, SCEVExpander &Rewriter)
static bool isLoopCounter(PHINode *Phi, Loop *L, ScalarEvolution *SE)
Return true if the given phi is a "counter" in L.
static std::optional< Value * > createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB, const SCEV *MaxIter, bool Inverted, bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter)
static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl< Value * > &Visited, unsigned Depth)
Recursive helper for hasConcreteDef().
static bool hasConcreteDef(Value *V)
Return true if the given value is concrete.
static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
static PHINode * getLoopPhiForCounter(Value *IncV, Loop *L)
Given an Value which is hoped to be part of an add recurance in the given loop, return the associated...
static Constant * createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB, bool IsTaken)
static cl::opt< bool > UsePostIncrementRanges("indvars-post-increment-ranges", cl::Hidden, cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), cl::init(true))
#define DEBUG_TYPE
static PHINode * FindLoopCounter(Loop *L, BasicBlock *ExitingBB, const SCEV *BECount, ScalarEvolution *SE, DominatorTree *DT)
Search the loop header for a loop counter (anadd rec w/step of one) suitable for use by LFTR.
static cl::opt< bool > AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), cl::desc("Allow widening of indvars to eliminate s/zext"))
static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal)
Convert APF to an integer, if possible.
static cl::opt< bool > LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), cl::desc("Predicate conditions in read only loops"))
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
if(PassOpts->AAPipeline)
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
#define LLVM_DEBUG(...)
Definition: Debug.h:119
This pass exposes codegen information to IR-level passes.
Virtual Register Rewriter
Definition: VirtRegMap.cpp:269
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:83
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1332
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:255
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:412
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:528
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:393
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:233
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:73
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:448
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:612
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:678
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:681
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:707
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:708
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:684
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:693
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:682
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:683
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:705
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:692
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:686
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:689
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:703
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:690
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:685
@ ICMP_EQ
equal
Definition: InstrTypes.h:699
@ ICMP_NE
not equal
Definition: InstrTypes.h:700
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:706
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:694
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:691
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Definition: CmpPredicate.h:23
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:277
const APFloat & getValueAPF() const
Definition: Constants.h:320
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.h:131
This is an important base class in LLVM.
Definition: Constant.h:43
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
Definition: DataLayout.h:220
static DebugLoc getDropped()
Definition: DebugLoc.h:164
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:165
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:135
This instruction compares its operands according to the predicate given to the constructor.
This instruction compares its operands according to the predicate given to the constructor.
CmpPredicate getCmpPredicate() const
CmpPredicate getInverseCmpPredicate() const
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2439
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2780
Interface for visiting interesting IV users that are recognized but not simplified by this utility.
virtual void visitCast(CastInst *Cast)=0
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:513
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:312
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:510
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:86
Class to represent integer types.
Definition: DerivedTypes.h:42
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form.
Definition: LoopInfo.h:442
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:40
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:936
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:702
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:303
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static unsigned getIncomingValueNumForOperand(unsigned i)
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1885
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:118
This node represents a polynomial recurrence on the trip count of the specified loop.
LLVM_ABI const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
LLVM_ABI const SCEVAddRecExpr * getPostIncExpr(ScalarEvolution &SE) const
Return an expression representing the value of this expression one iteration of the loop ahead.
This class uses information about analyze scalars to rewrite expressions in canonical form.
This class represents an analyzed expression in the program.
LLVM_ABI bool isOne() const
Return true if the expression is a constant one.
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
This class represents a cast from signed integer to floating point.
The main scalar evolution driver.
LLVM_ABI std::optional< LoopInvariantPredicate > getLoopInvariantExitCondDuringFirstIterations(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L, const Instruction *CtxI, const SCEV *MaxIter)
If the result of the predicate LHS Pred RHS is loop invariant with respect to L at given Context duri...
LLVM_ABI Type * getWiderType(Type *Ty1, Type *Ty2) const
LLVM_ABI const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
LLVM_ABI ExitLimit computeExitLimitFromCond(const Loop *L, Value *ExitCond, bool ExitIfTrue, bool ControlsOnlyExit, bool AllowPredicates=false)
Compute the number of times the backedge of the specified loop will execute if its exit condition wer...
LLVM_ABI uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
LLVM_ABI std::optional< bool > evaluatePredicateAt(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Check whether the condition described by Pred, LHS, and RHS is true or false in the given Context.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
LLVM_ABI void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
LLVM_ABI const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
const SCEV * getMinusOne(Type *Ty)
Return a SCEV for the constant -1 of a specific type.
LLVM_ABI const SCEV * getNoopOrZeroExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
LLVM_ABI const SCEV * getUMinFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS, bool Sequential=false)
Promote the operands to the wider of the types using zero-extension, and then perform a umin operatio...
LLVM_ABI const SCEV * getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind=Exact)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
LLVM_ABI bool isKnownPredicateAt(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
size_type size() const
Definition: SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:380
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:418
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:470
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:938
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:267
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:240
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
Value * getOperand(unsigned i) const
Definition: User.h:232
unsigned getNumOperands() const
Definition: User.h:254
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:546
iterator_range< user_iterator > users()
Definition: Value.h:426
bool use_empty() const
Definition: Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1098
user_iterator_impl< User > user_iterator
Definition: Value.h:391
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:396
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:205
const ParentTy * getParent() const
Definition: ilist_node.h:34
self_iterator getIterator()
Definition: ilist_node.h:134
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
LLVM_ABI Function * getDeclarationIfExists(const Module *M, ID id)
Look up the Function declaration of the intrinsic id in the Module M and return it if it exists.
Definition: Intrinsics.cpp:762
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
cst_pred_ty< is_one > m_scev_One()
Match an integer 1.
specificloop_ty m_SpecificLoop(const Loop *L)
SCEVAffineAddRec_match< Op0_t, Op1_t, class_match< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
bool match(const SCEV *S, const Pattern &P)
class_match< const SCEV > m_SCEV()
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:712
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
LLVM_ABI bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, Instruction *OnPathTo, DominatorTree *DT)
Return true if undefined behavior would provable be executed on the path to OnPathTo if Root produced...
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:533
PHINode * createWideIV(const WideIVInfo &WI, LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter, DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, unsigned &NumElimExt, unsigned &NumWidened, bool HasGuards, bool UsePostIncrementRanges)
Widen Induction Variables - Extend the width of an IV to cover its widest uses.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:663
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
LLVM_ABI bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1669
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
LLVM_ABI cl::opt< unsigned > SCEVCheapExpansionBudget
std::pair< bool, bool > simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, const TargetTransformInfo *TTI, SmallVectorImpl< WeakTrackingVH > &Dead, SCEVExpander &Rewriter, IVVisitor *V=nullptr)
simplifyUsersOfIV - Simplify instructions that use this induction variable by using ScalarEvolution t...
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
TargetTransformInfo TTI
LLVM_ABI bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:84
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2139
LLVM_ABI bool isAlmostDeadIV(PHINode *IV, BasicBlock *LatchBlock, Value *Cond)
Return true if the induction variable IV in a Loop whose latch is LatchBlock would become dead if the...
Definition: LoopUtils.cpp:471
LLVM_ABI int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
Definition: LoopUtils.cpp:1574
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition: iterator.h:363
LLVM_ABI bool RecursivelyDeleteDeadPHINode(PHINode *PN, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is an effectively dead PHI node, due to being a def-use chain of single-use no...
Definition: Local.cpp:641
@ UnusedIndVarInLoop
Definition: LoopUtils.h:495
@ OnlyCheapRepl
Definition: LoopUtils.h:493
@ NeverRepl
Definition: LoopUtils.h:492
@ NoHardUse
Definition: LoopUtils.h:494
@ AlwaysRepl
Definition: LoopUtils.h:496
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
A marker analysis to determine if SimpleLoopUnswitch should run again on a given loop.
Collect information about induction variables that are used by sign/zero extend operations.
PHINode * NarrowIV