LLVM 21.0.0git
IndVarSimplify.cpp
Go to the documentation of this file.
1//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This transformation analyzes and transforms the induction variables (and
10// computations derived from them) into simpler forms suitable for subsequent
11// analysis and transformation.
12//
13// If the trip count of a loop is computable, this pass also makes the following
14// changes:
15// 1. The exit condition for the loop is canonicalized to compare the
16// induction value against the exit value. This turns loops like:
17// 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18// 2. Any use outside of the loop of an expression derived from the indvar
19// is changed to compute the derived value outside of the loop, eliminating
20// the dependence on the exit value of the induction variable. If the only
21// purpose of the loop is to compute the exit value of some derived
22// expression, this transformation will make the loop dead.
23//
24//===----------------------------------------------------------------------===//
25
27#include "llvm/ADT/APFloat.h"
28#include "llvm/ADT/ArrayRef.h"
29#include "llvm/ADT/STLExtras.h"
31#include "llvm/ADT/SmallSet.h"
33#include "llvm/ADT/Statistic.h"
44#include "llvm/IR/BasicBlock.h"
45#include "llvm/IR/Constant.h"
47#include "llvm/IR/Constants.h"
48#include "llvm/IR/DataLayout.h"
50#include "llvm/IR/Dominators.h"
51#include "llvm/IR/Function.h"
52#include "llvm/IR/IRBuilder.h"
53#include "llvm/IR/InstrTypes.h"
54#include "llvm/IR/Instruction.h"
57#include "llvm/IR/Intrinsics.h"
58#include "llvm/IR/PassManager.h"
60#include "llvm/IR/Type.h"
61#include "llvm/IR/Use.h"
62#include "llvm/IR/User.h"
63#include "llvm/IR/Value.h"
64#include "llvm/IR/ValueHandle.h"
67#include "llvm/Support/Debug.h"
76#include <cassert>
77#include <cstdint>
78#include <utility>
79
80using namespace llvm;
81using namespace PatternMatch;
82
83#define DEBUG_TYPE "indvars"
84
85STATISTIC(NumWidened , "Number of indvars widened");
86STATISTIC(NumReplaced , "Number of exit values replaced");
87STATISTIC(NumLFTR , "Number of loop exit tests replaced");
88STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
89STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
90
92 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
93 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
95 clEnumValN(NeverRepl, "never", "never replace exit value"),
97 "only replace exit value when the cost is cheap"),
99 UnusedIndVarInLoop, "unusedindvarinloop",
100 "only replace exit value when it is an unused "
101 "induction variable in the loop and has cheap replacement cost"),
102 clEnumValN(NoHardUse, "noharduse",
103 "only replace exit values when loop def likely dead"),
104 clEnumValN(AlwaysRepl, "always",
105 "always replace exit value whenever possible")));
106
108 "indvars-post-increment-ranges", cl::Hidden,
109 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
110 cl::init(true));
111
112static cl::opt<bool>
113DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
114 cl::desc("Disable Linear Function Test Replace optimization"));
115
116static cl::opt<bool>
117LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
118 cl::desc("Predicate conditions in read only loops"));
119
120static cl::opt<bool>
121AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
122 cl::desc("Allow widening of indvars to eliminate s/zext"));
123
124namespace {
125
126class IndVarSimplify {
127 LoopInfo *LI;
128 ScalarEvolution *SE;
129 DominatorTree *DT;
130 const DataLayout &DL;
133 std::unique_ptr<MemorySSAUpdater> MSSAU;
134
136 bool WidenIndVars;
137
138 bool RunUnswitching = false;
139
140 bool handleFloatingPointIV(Loop *L, PHINode *PH);
141 bool rewriteNonIntegerIVs(Loop *L);
142
143 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
144 /// Try to improve our exit conditions by converting condition from signed
145 /// to unsigned or rotating computation out of the loop.
146 /// (See inline comment about why this is duplicated from simplifyAndExtend)
147 bool canonicalizeExitCondition(Loop *L);
148 /// Try to eliminate loop exits based on analyzeable exit counts
149 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
150 /// Try to form loop invariant tests for loop exits by changing how many
151 /// iterations of the loop run when that is unobservable.
152 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
153
154 bool rewriteFirstIterationLoopExitValues(Loop *L);
155
156 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
157 const SCEV *ExitCount,
158 PHINode *IndVar, SCEVExpander &Rewriter);
159
160 bool sinkUnusedInvariants(Loop *L);
161
162public:
163 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
164 const DataLayout &DL, TargetLibraryInfo *TLI,
165 TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
166 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
167 WidenIndVars(WidenIndVars) {
168 if (MSSA)
169 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
170 }
171
172 bool run(Loop *L);
173
174 bool runUnswitching() const { return RunUnswitching; }
175};
176
177} // end anonymous namespace
178
179//===----------------------------------------------------------------------===//
180// rewriteNonIntegerIVs and helpers. Prefer integer IVs.
181//===----------------------------------------------------------------------===//
182
183/// Convert APF to an integer, if possible.
184static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
185 bool isExact = false;
186 // See if we can convert this to an int64_t
187 uint64_t UIntVal;
188 if (APF.convertToInteger(MutableArrayRef(UIntVal), 64, true,
189 APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
190 !isExact)
191 return false;
192 IntVal = UIntVal;
193 return true;
194}
195
196/// If the loop has floating induction variable then insert corresponding
197/// integer induction variable if possible.
198/// For example,
199/// for(double i = 0; i < 10000; ++i)
200/// bar(i)
201/// is converted into
202/// for(int i = 0; i < 10000; ++i)
203/// bar((double)i);
204bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
205 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
206 unsigned BackEdge = IncomingEdge^1;
207
208 // Check incoming value.
209 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
210
211 int64_t InitValue;
212 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
213 return false;
214
215 // Check IV increment. Reject this PN if increment operation is not
216 // an add or increment value can not be represented by an integer.
217 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
218 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
219
220 // If this is not an add of the PHI with a constantfp, or if the constant fp
221 // is not an integer, bail out.
222 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
223 int64_t IncValue;
224 if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
225 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
226 return false;
227
228 // Check Incr uses. One user is PN and the other user is an exit condition
229 // used by the conditional terminator.
230 Value::user_iterator IncrUse = Incr->user_begin();
231 Instruction *U1 = cast<Instruction>(*IncrUse++);
232 if (IncrUse == Incr->user_end()) return false;
233 Instruction *U2 = cast<Instruction>(*IncrUse++);
234 if (IncrUse != Incr->user_end()) return false;
235
236 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
237 // only used by a branch, we can't transform it.
238 FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
239 if (!Compare)
240 Compare = dyn_cast<FCmpInst>(U2);
241 if (!Compare || !Compare->hasOneUse() ||
242 !isa<BranchInst>(Compare->user_back()))
243 return false;
244
245 BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
246
247 // We need to verify that the branch actually controls the iteration count
248 // of the loop. If not, the new IV can overflow and no one will notice.
249 // The branch block must be in the loop and one of the successors must be out
250 // of the loop.
251 assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
252 if (!L->contains(TheBr->getParent()) ||
253 (L->contains(TheBr->getSuccessor(0)) &&
254 L->contains(TheBr->getSuccessor(1))))
255 return false;
256
257 // If it isn't a comparison with an integer-as-fp (the exit value), we can't
258 // transform it.
259 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
260 int64_t ExitValue;
261 if (ExitValueVal == nullptr ||
262 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
263 return false;
264
265 // Find new predicate for integer comparison.
267 switch (Compare->getPredicate()) {
268 default: return false; // Unknown comparison.
270 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
272 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
274 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
276 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
278 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
280 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
281 }
282
283 // We convert the floating point induction variable to a signed i32 value if
284 // we can. This is only safe if the comparison will not overflow in a way
285 // that won't be trapped by the integer equivalent operations. Check for this
286 // now.
287 // TODO: We could use i64 if it is native and the range requires it.
288
289 // The start/stride/exit values must all fit in signed i32.
290 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
291 return false;
292
293 // If not actually striding (add x, 0.0), avoid touching the code.
294 if (IncValue == 0)
295 return false;
296
297 // Positive and negative strides have different safety conditions.
298 if (IncValue > 0) {
299 // If we have a positive stride, we require the init to be less than the
300 // exit value.
301 if (InitValue >= ExitValue)
302 return false;
303
304 uint32_t Range = uint32_t(ExitValue-InitValue);
305 // Check for infinite loop, either:
306 // while (i <= Exit) or until (i > Exit)
307 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
308 if (++Range == 0) return false; // Range overflows.
309 }
310
311 unsigned Leftover = Range % uint32_t(IncValue);
312
313 // If this is an equality comparison, we require that the strided value
314 // exactly land on the exit value, otherwise the IV condition will wrap
315 // around and do things the fp IV wouldn't.
316 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
317 Leftover != 0)
318 return false;
319
320 // If the stride would wrap around the i32 before exiting, we can't
321 // transform the IV.
322 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
323 return false;
324 } else {
325 // If we have a negative stride, we require the init to be greater than the
326 // exit value.
327 if (InitValue <= ExitValue)
328 return false;
329
330 uint32_t Range = uint32_t(InitValue-ExitValue);
331 // Check for infinite loop, either:
332 // while (i >= Exit) or until (i < Exit)
333 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
334 if (++Range == 0) return false; // Range overflows.
335 }
336
337 unsigned Leftover = Range % uint32_t(-IncValue);
338
339 // If this is an equality comparison, we require that the strided value
340 // exactly land on the exit value, otherwise the IV condition will wrap
341 // around and do things the fp IV wouldn't.
342 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
343 Leftover != 0)
344 return false;
345
346 // If the stride would wrap around the i32 before exiting, we can't
347 // transform the IV.
348 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
349 return false;
350 }
351
352 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
353
354 // Insert new integer induction variable.
355 PHINode *NewPHI =
356 PHINode::Create(Int32Ty, 2, PN->getName() + ".int", PN->getIterator());
357 NewPHI->addIncoming(ConstantInt::getSigned(Int32Ty, InitValue),
358 PN->getIncomingBlock(IncomingEdge));
359 NewPHI->setDebugLoc(PN->getDebugLoc());
360
361 Instruction *NewAdd = BinaryOperator::CreateAdd(
362 NewPHI, ConstantInt::getSigned(Int32Ty, IncValue),
363 Incr->getName() + ".int", Incr->getIterator());
364 NewAdd->setDebugLoc(Incr->getDebugLoc());
365 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
366
367 ICmpInst *NewCompare = new ICmpInst(
368 TheBr->getIterator(), NewPred, NewAdd,
369 ConstantInt::getSigned(Int32Ty, ExitValue), Compare->getName());
370 NewCompare->setDebugLoc(Compare->getDebugLoc());
371
372 // In the following deletions, PN may become dead and may be deleted.
373 // Use a WeakTrackingVH to observe whether this happens.
374 WeakTrackingVH WeakPH = PN;
375
376 // Delete the old floating point exit comparison. The branch starts using the
377 // new comparison.
378 NewCompare->takeName(Compare);
379 Compare->replaceAllUsesWith(NewCompare);
380 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
381
382 // Delete the old floating point increment.
383 Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType()));
384 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
385
386 // If the FP induction variable still has uses, this is because something else
387 // in the loop uses its value. In order to canonicalize the induction
388 // variable, we chose to eliminate the IV and rewrite it in terms of an
389 // int->fp cast.
390 //
391 // We give preference to sitofp over uitofp because it is faster on most
392 // platforms.
393 if (WeakPH) {
394 Instruction *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
395 PN->getParent()->getFirstInsertionPt());
396 Conv->setDebugLoc(PN->getDebugLoc());
397 PN->replaceAllUsesWith(Conv);
398 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
399 }
400 return true;
401}
402
403bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
404 // First step. Check to see if there are any floating-point recurrences.
405 // If there are, change them into integer recurrences, permitting analysis by
406 // the SCEV routines.
407 BasicBlock *Header = L->getHeader();
408
410 for (PHINode &PN : Header->phis())
411 PHIs.push_back(&PN);
412
413 bool Changed = false;
414 for (WeakTrackingVH &PHI : PHIs)
415 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHI))
416 Changed |= handleFloatingPointIV(L, PN);
417
418 // If the loop previously had floating-point IV, ScalarEvolution
419 // may not have been able to compute a trip count. Now that we've done some
420 // re-writing, the trip count may be computable.
421 if (Changed)
422 SE->forgetLoop(L);
423 return Changed;
424}
425
426//===---------------------------------------------------------------------===//
427// rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
428// they will exit at the first iteration.
429//===---------------------------------------------------------------------===//
430
431/// Check to see if this loop has loop invariant conditions which lead to loop
432/// exits. If so, we know that if the exit path is taken, it is at the first
433/// loop iteration. This lets us predict exit values of PHI nodes that live in
434/// loop header.
435bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
436 // Verify the input to the pass is already in LCSSA form.
437 assert(L->isLCSSAForm(*DT));
438
440 L->getUniqueExitBlocks(ExitBlocks);
441
442 bool MadeAnyChanges = false;
443 for (auto *ExitBB : ExitBlocks) {
444 // If there are no more PHI nodes in this exit block, then no more
445 // values defined inside the loop are used on this path.
446 for (PHINode &PN : ExitBB->phis()) {
447 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
448 IncomingValIdx != E; ++IncomingValIdx) {
449 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
450
451 // Can we prove that the exit must run on the first iteration if it
452 // runs at all? (i.e. early exits are fine for our purposes, but
453 // traces which lead to this exit being taken on the 2nd iteration
454 // aren't.) Note that this is about whether the exit branch is
455 // executed, not about whether it is taken.
456 if (!L->getLoopLatch() ||
457 !DT->dominates(IncomingBB, L->getLoopLatch()))
458 continue;
459
460 // Get condition that leads to the exit path.
461 auto *TermInst = IncomingBB->getTerminator();
462
463 Value *Cond = nullptr;
464 if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
465 // Must be a conditional branch, otherwise the block
466 // should not be in the loop.
467 Cond = BI->getCondition();
468 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
469 Cond = SI->getCondition();
470 else
471 continue;
472
473 if (!L->isLoopInvariant(Cond))
474 continue;
475
476 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
477
478 // Only deal with PHIs in the loop header.
479 if (!ExitVal || ExitVal->getParent() != L->getHeader())
480 continue;
481
482 // If ExitVal is a PHI on the loop header, then we know its
483 // value along this exit because the exit can only be taken
484 // on the first iteration.
485 auto *LoopPreheader = L->getLoopPreheader();
486 assert(LoopPreheader && "Invalid loop");
487 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
488 if (PreheaderIdx != -1) {
489 assert(ExitVal->getParent() == L->getHeader() &&
490 "ExitVal must be in loop header");
491 MadeAnyChanges = true;
492 PN.setIncomingValue(IncomingValIdx,
493 ExitVal->getIncomingValue(PreheaderIdx));
494 SE->forgetValue(&PN);
495 }
496 }
497 }
498 }
499 return MadeAnyChanges;
500}
501
502//===----------------------------------------------------------------------===//
503// IV Widening - Extend the width of an IV to cover its widest uses.
504//===----------------------------------------------------------------------===//
505
506/// Update information about the induction variable that is extended by this
507/// sign or zero extend operation. This is used to determine the final width of
508/// the IV before actually widening it.
509static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
510 ScalarEvolution *SE,
511 const TargetTransformInfo *TTI) {
512 bool IsSigned = Cast->getOpcode() == Instruction::SExt;
513 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
514 return;
515
516 Type *Ty = Cast->getType();
517 uint64_t Width = SE->getTypeSizeInBits(Ty);
518 if (!Cast->getDataLayout().isLegalInteger(Width))
519 return;
520
521 // Check that `Cast` actually extends the induction variable (we rely on this
522 // later). This takes care of cases where `Cast` is extending a truncation of
523 // the narrow induction variable, and thus can end up being narrower than the
524 // "narrow" induction variable.
525 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
526 if (NarrowIVWidth >= Width)
527 return;
528
529 // Cast is either an sext or zext up to this point.
530 // We should not widen an indvar if arithmetics on the wider indvar are more
531 // expensive than those on the narrower indvar. We check only the cost of ADD
532 // because at least an ADD is required to increment the induction variable. We
533 // could compute more comprehensively the cost of all instructions on the
534 // induction variable when necessary.
535 if (TTI &&
536 TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
537 TTI->getArithmeticInstrCost(Instruction::Add,
538 Cast->getOperand(0)->getType())) {
539 return;
540 }
541
542 if (!WI.WidestNativeType ||
543 Width > SE->getTypeSizeInBits(WI.WidestNativeType)) {
545 WI.IsSigned = IsSigned;
546 return;
547 }
548
549 // We extend the IV to satisfy the sign of its user(s), or 'signed'
550 // if there are multiple users with both sign- and zero extensions,
551 // in order not to introduce nondeterministic behaviour based on the
552 // unspecified order of a PHI nodes' users-iterator.
553 WI.IsSigned |= IsSigned;
554}
555
556//===----------------------------------------------------------------------===//
557// Live IV Reduction - Minimize IVs live across the loop.
558//===----------------------------------------------------------------------===//
559
560//===----------------------------------------------------------------------===//
561// Simplification of IV users based on SCEV evaluation.
562//===----------------------------------------------------------------------===//
563
564namespace {
565
566class IndVarSimplifyVisitor : public IVVisitor {
567 ScalarEvolution *SE;
569 PHINode *IVPhi;
570
571public:
572 WideIVInfo WI;
573
574 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
576 const DominatorTree *DTree)
577 : SE(SCEV), TTI(TTI), IVPhi(IV) {
578 DT = DTree;
579 WI.NarrowIV = IVPhi;
580 }
581
582 // Implement the interface used by simplifyUsersOfIV.
583 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
584};
585
586} // end anonymous namespace
587
588/// Iteratively perform simplification on a worklist of IV users. Each
589/// successive simplification may push more users which may themselves be
590/// candidates for simplification.
591///
592/// Sign/Zero extend elimination is interleaved with IV simplification.
593bool IndVarSimplify::simplifyAndExtend(Loop *L,
594 SCEVExpander &Rewriter,
595 LoopInfo *LI) {
597
598 auto *GuardDecl = Intrinsic::getDeclarationIfExists(
599 L->getBlocks()[0]->getModule(), Intrinsic::experimental_guard);
600 bool HasGuards = GuardDecl && !GuardDecl->use_empty();
601
603 for (PHINode &PN : L->getHeader()->phis())
604 LoopPhis.push_back(&PN);
605
606 // Each round of simplification iterates through the SimplifyIVUsers worklist
607 // for all current phis, then determines whether any IVs can be
608 // widened. Widening adds new phis to LoopPhis, inducing another round of
609 // simplification on the wide IVs.
610 bool Changed = false;
611 while (!LoopPhis.empty()) {
612 // Evaluate as many IV expressions as possible before widening any IVs. This
613 // forces SCEV to set no-wrap flags before evaluating sign/zero
614 // extension. The first time SCEV attempts to normalize sign/zero extension,
615 // the result becomes final. So for the most predictable results, we delay
616 // evaluation of sign/zero extend evaluation until needed, and avoid running
617 // other SCEV based analysis prior to simplifyAndExtend.
618 do {
619 PHINode *CurrIV = LoopPhis.pop_back_val();
620
621 // Information about sign/zero extensions of CurrIV.
622 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
623
624 const auto &[C, U] = simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts,
625 Rewriter, &Visitor);
626
627 Changed |= C;
628 RunUnswitching |= U;
629 if (Visitor.WI.WidestNativeType) {
630 WideIVs.push_back(Visitor.WI);
631 }
632 } while(!LoopPhis.empty());
633
634 // Continue if we disallowed widening.
635 if (!WidenIndVars)
636 continue;
637
638 for (; !WideIVs.empty(); WideIVs.pop_back()) {
639 unsigned ElimExt;
640 unsigned Widened;
641 if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
642 DT, DeadInsts, ElimExt, Widened,
643 HasGuards, UsePostIncrementRanges)) {
644 NumElimExt += ElimExt;
645 NumWidened += Widened;
646 Changed = true;
647 LoopPhis.push_back(WidePhi);
648 }
649 }
650 }
651 return Changed;
652}
653
654//===----------------------------------------------------------------------===//
655// linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
656//===----------------------------------------------------------------------===//
657
658/// Given an Value which is hoped to be part of an add recurance in the given
659/// loop, return the associated Phi node if so. Otherwise, return null. Note
660/// that this is less general than SCEVs AddRec checking.
662 Instruction *IncI = dyn_cast<Instruction>(IncV);
663 if (!IncI)
664 return nullptr;
665
666 switch (IncI->getOpcode()) {
667 case Instruction::Add:
668 case Instruction::Sub:
669 break;
670 case Instruction::GetElementPtr:
671 // An IV counter must preserve its type.
672 if (IncI->getNumOperands() == 2)
673 break;
674 [[fallthrough]];
675 default:
676 return nullptr;
677 }
678
679 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
680 if (Phi && Phi->getParent() == L->getHeader()) {
681 if (L->isLoopInvariant(IncI->getOperand(1)))
682 return Phi;
683 return nullptr;
684 }
685 if (IncI->getOpcode() == Instruction::GetElementPtr)
686 return nullptr;
687
688 // Allow add/sub to be commuted.
689 Phi = dyn_cast<PHINode>(IncI->getOperand(1));
690 if (Phi && Phi->getParent() == L->getHeader()) {
691 if (L->isLoopInvariant(IncI->getOperand(0)))
692 return Phi;
693 }
694 return nullptr;
695}
696
697/// Whether the current loop exit test is based on this value. Currently this
698/// is limited to a direct use in the loop condition.
699static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
700 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
701 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
702 // TODO: Allow non-icmp loop test.
703 if (!ICmp)
704 return false;
705
706 // TODO: Allow indirect use.
707 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
708}
709
710/// linearFunctionTestReplace policy. Return true unless we can show that the
711/// current exit test is already sufficiently canonical.
712static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
713 assert(L->getLoopLatch() && "Must be in simplified form");
714
715 // Avoid converting a constant or loop invariant test back to a runtime
716 // test. This is critical for when SCEV's cached ExitCount is less precise
717 // than the current IR (such as after we've proven a particular exit is
718 // actually dead and thus the BE count never reaches our ExitCount.)
719 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
720 if (L->isLoopInvariant(BI->getCondition()))
721 return false;
722
723 // Do LFTR to simplify the exit condition to an ICMP.
724 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
725 if (!Cond)
726 return true;
727
728 // Do LFTR to simplify the exit ICMP to EQ/NE
729 ICmpInst::Predicate Pred = Cond->getPredicate();
730 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
731 return true;
732
733 // Look for a loop invariant RHS
734 Value *LHS = Cond->getOperand(0);
735 Value *RHS = Cond->getOperand(1);
736 if (!L->isLoopInvariant(RHS)) {
737 if (!L->isLoopInvariant(LHS))
738 return true;
739 std::swap(LHS, RHS);
740 }
741 // Look for a simple IV counter LHS
742 PHINode *Phi = dyn_cast<PHINode>(LHS);
743 if (!Phi)
744 Phi = getLoopPhiForCounter(LHS, L);
745
746 if (!Phi)
747 return true;
748
749 // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
750 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
751 if (Idx < 0)
752 return true;
753
754 // Do LFTR if the exit condition's IV is *not* a simple counter.
755 Value *IncV = Phi->getIncomingValue(Idx);
756 return Phi != getLoopPhiForCounter(IncV, L);
757}
758
759/// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
760/// down to checking that all operands are constant and listing instructions
761/// that may hide undef.
763 unsigned Depth) {
764 if (isa<Constant>(V))
765 return !isa<UndefValue>(V);
766
767 if (Depth >= 6)
768 return false;
769
770 // Conservatively handle non-constant non-instructions. For example, Arguments
771 // may be undef.
772 Instruction *I = dyn_cast<Instruction>(V);
773 if (!I)
774 return false;
775
776 // Load and return values may be undef.
777 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
778 return false;
779
780 // Optimistically handle other instructions.
781 for (Value *Op : I->operands()) {
782 if (!Visited.insert(Op).second)
783 continue;
784 if (!hasConcreteDefImpl(Op, Visited, Depth+1))
785 return false;
786 }
787 return true;
788}
789
790/// Return true if the given value is concrete. We must prove that undef can
791/// never reach it.
792///
793/// TODO: If we decide that this is a good approach to checking for undef, we
794/// may factor it into a common location.
795static bool hasConcreteDef(Value *V) {
797 Visited.insert(V);
798 return hasConcreteDefImpl(V, Visited, 0);
799}
800
801/// Return true if the given phi is a "counter" in L. A counter is an
802/// add recurance (of integer or pointer type) with an arbitrary start, and a
803/// step of 1. Note that L must have exactly one latch.
804static bool isLoopCounter(PHINode* Phi, Loop *L,
805 ScalarEvolution *SE) {
806 assert(Phi->getParent() == L->getHeader());
807 assert(L->getLoopLatch());
808
809 if (!SE->isSCEVable(Phi->getType()))
810 return false;
811
812 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
813 if (!AR || AR->getLoop() != L || !AR->isAffine())
814 return false;
815
816 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
817 if (!Step || !Step->isOne())
818 return false;
819
820 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
821 Value *IncV = Phi->getIncomingValue(LatchIdx);
822 return (getLoopPhiForCounter(IncV, L) == Phi &&
823 isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
824}
825
826/// Search the loop header for a loop counter (anadd rec w/step of one)
827/// suitable for use by LFTR. If multiple counters are available, select the
828/// "best" one based profitable heuristics.
829///
830/// BECount may be an i8* pointer type. The pointer difference is already
831/// valid count without scaling the address stride, so it remains a pointer
832/// expression as far as SCEV is concerned.
833static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
834 const SCEV *BECount,
836 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
837
838 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
839
840 // Loop over all of the PHI nodes, looking for a simple counter.
841 PHINode *BestPhi = nullptr;
842 const SCEV *BestInit = nullptr;
843 BasicBlock *LatchBlock = L->getLoopLatch();
844 assert(LatchBlock && "Must be in simplified form");
845 const DataLayout &DL = L->getHeader()->getDataLayout();
846
847 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
848 PHINode *Phi = cast<PHINode>(I);
849 if (!isLoopCounter(Phi, L, SE))
850 continue;
851
852 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
853
854 // AR may be a pointer type, while BECount is an integer type.
855 // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
856 // AR may not be a narrower type, or we may never exit.
857 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
858 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
859 continue;
860
861 // Avoid reusing a potentially undef value to compute other values that may
862 // have originally had a concrete definition.
863 if (!hasConcreteDef(Phi)) {
864 // We explicitly allow unknown phis as long as they are already used by
865 // the loop exit test. This is legal since performing LFTR could not
866 // increase the number of undef users.
867 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
868 if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
869 !isLoopExitTestBasedOn(IncPhi, ExitingBB))
870 continue;
871 }
872
873 // Avoid introducing undefined behavior due to poison which didn't exist in
874 // the original program. (Annoyingly, the rules for poison and undef
875 // propagation are distinct, so this does NOT cover the undef case above.)
876 // We have to ensure that we don't introduce UB by introducing a use on an
877 // iteration where said IV produces poison. Our strategy here differs for
878 // pointers and integer IVs. For integers, we strip and reinfer as needed,
879 // see code in linearFunctionTestReplace. For pointers, we restrict
880 // transforms as there is no good way to reinfer inbounds once lost.
881 if (!Phi->getType()->isIntegerTy() &&
882 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
883 continue;
884
885 const SCEV *Init = AR->getStart();
886
887 if (BestPhi && !isAlmostDeadIV(BestPhi, LatchBlock, Cond)) {
888 // Don't force a live loop counter if another IV can be used.
889 if (isAlmostDeadIV(Phi, LatchBlock, Cond))
890 continue;
891
892 // Prefer to count-from-zero. This is a more "canonical" counter form. It
893 // also prefers integer to pointer IVs.
894 if (BestInit->isZero() != Init->isZero()) {
895 if (BestInit->isZero())
896 continue;
897 }
898 // If two IVs both count from zero or both count from nonzero then the
899 // narrower is likely a dead phi that has been widened. Use the wider phi
900 // to allow the other to be eliminated.
901 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
902 continue;
903 }
904 BestPhi = Phi;
905 BestInit = Init;
906 }
907 return BestPhi;
908}
909
910/// Insert an IR expression which computes the value held by the IV IndVar
911/// (which must be an loop counter w/unit stride) after the backedge of loop L
912/// is taken ExitCount times.
913static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
914 const SCEV *ExitCount, bool UsePostInc, Loop *L,
915 SCEVExpander &Rewriter, ScalarEvolution *SE) {
916 assert(isLoopCounter(IndVar, L, SE));
917 assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
918 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
919 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
920
921 // For integer IVs, truncate the IV before computing the limit unless we
922 // know apriori that the limit must be a constant when evaluated in the
923 // bitwidth of the IV. We prefer (potentially) keeping a truncate of the
924 // IV in the loop over a (potentially) expensive expansion of the widened
925 // exit count add(zext(add)) expression.
926 if (IndVar->getType()->isIntegerTy() &&
927 SE->getTypeSizeInBits(AR->getType()) >
928 SE->getTypeSizeInBits(ExitCount->getType())) {
929 const SCEV *IVInit = AR->getStart();
930 if (!isa<SCEVConstant>(IVInit) || !isa<SCEVConstant>(ExitCount))
931 AR = cast<SCEVAddRecExpr>(SE->getTruncateExpr(AR, ExitCount->getType()));
932 }
933
934 const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(*SE) : AR;
935 const SCEV *IVLimit = ARBase->evaluateAtIteration(ExitCount, *SE);
936 assert(SE->isLoopInvariant(IVLimit, L) &&
937 "Computed iteration count is not loop invariant!");
938 return Rewriter.expandCodeFor(IVLimit, ARBase->getType(),
939 ExitingBB->getTerminator());
940}
941
942/// This method rewrites the exit condition of the loop to be a canonical !=
943/// comparison against the incremented loop induction variable. This pass is
944/// able to rewrite the exit tests of any loop where the SCEV analysis can
945/// determine a loop-invariant trip count of the loop, which is actually a much
946/// broader range than just linear tests.
947bool IndVarSimplify::
948linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
949 const SCEV *ExitCount,
950 PHINode *IndVar, SCEVExpander &Rewriter) {
951 assert(L->getLoopLatch() && "Loop no longer in simplified form?");
952 assert(isLoopCounter(IndVar, L, SE));
953 Instruction * const IncVar =
954 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
955
956 // Initialize CmpIndVar to the preincremented IV.
957 Value *CmpIndVar = IndVar;
958 bool UsePostInc = false;
959
960 // If the exiting block is the same as the backedge block, we prefer to
961 // compare against the post-incremented value, otherwise we must compare
962 // against the preincremented value.
963 if (ExitingBB == L->getLoopLatch()) {
964 // For pointer IVs, we chose to not strip inbounds which requires us not
965 // to add a potentially UB introducing use. We need to either a) show
966 // the loop test we're modifying is already in post-inc form, or b) show
967 // that adding a use must not introduce UB.
968 bool SafeToPostInc =
969 IndVar->getType()->isIntegerTy() ||
970 isLoopExitTestBasedOn(IncVar, ExitingBB) ||
971 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
972 if (SafeToPostInc) {
973 UsePostInc = true;
974 CmpIndVar = IncVar;
975 }
976 }
977
978 // It may be necessary to drop nowrap flags on the incrementing instruction
979 // if either LFTR moves from a pre-inc check to a post-inc check (in which
980 // case the increment might have previously been poison on the last iteration
981 // only) or if LFTR switches to a different IV that was previously dynamically
982 // dead (and as such may be arbitrarily poison). We remove any nowrap flags
983 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
984 // check), because the pre-inc addrec flags may be adopted from the original
985 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
986 // TODO: This handling is inaccurate for one case: If we switch to a
987 // dynamically dead IV that wraps on the first loop iteration only, which is
988 // not covered by the post-inc addrec. (If the new IV was not dynamically
989 // dead, it could not be poison on the first iteration in the first place.)
990 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
991 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
992 if (BO->hasNoUnsignedWrap())
993 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
994 if (BO->hasNoSignedWrap())
995 BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
996 }
997
998 Value *ExitCnt = genLoopLimit(
999 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
1000 assert(ExitCnt->getType()->isPointerTy() ==
1001 IndVar->getType()->isPointerTy() &&
1002 "genLoopLimit missed a cast");
1003
1004 // Insert a new icmp_ne or icmp_eq instruction before the branch.
1005 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1007 if (L->contains(BI->getSuccessor(0)))
1008 P = ICmpInst::ICMP_NE;
1009 else
1010 P = ICmpInst::ICMP_EQ;
1011
1012 IRBuilder<> Builder(BI);
1013
1014 // The new loop exit condition should reuse the debug location of the
1015 // original loop exit condition.
1016 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1017 Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1018
1019 // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1020 // avoid the expensive expansion of the limit expression in the wider type,
1021 // emit a truncate to narrow the IV to the ExitCount type. This is safe
1022 // since we know (from the exit count bitwidth), that we can't self-wrap in
1023 // the narrower type.
1024 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1025 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1026 if (CmpIndVarSize > ExitCntSize) {
1027 assert(!CmpIndVar->getType()->isPointerTy() &&
1028 !ExitCnt->getType()->isPointerTy());
1029
1030 // Before resorting to actually inserting the truncate, use the same
1031 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1032 // the other side of the comparison instead. We still evaluate the limit
1033 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1034 // a truncate within in.
1035 bool Extended = false;
1036 const SCEV *IV = SE->getSCEV(CmpIndVar);
1037 const SCEV *TruncatedIV = SE->getTruncateExpr(IV, ExitCnt->getType());
1038 const SCEV *ZExtTrunc =
1039 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1040
1041 if (ZExtTrunc == IV) {
1042 Extended = true;
1043 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1044 "wide.trip.count");
1045 } else {
1046 const SCEV *SExtTrunc =
1047 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1048 if (SExtTrunc == IV) {
1049 Extended = true;
1050 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1051 "wide.trip.count");
1052 }
1053 }
1054
1055 if (Extended) {
1056 bool Discard;
1057 L->makeLoopInvariant(ExitCnt, Discard);
1058 } else
1059 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1060 "lftr.wideiv");
1061 }
1062 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1063 << " LHS:" << *CmpIndVar << '\n'
1064 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1065 << "\n"
1066 << " RHS:\t" << *ExitCnt << "\n"
1067 << "ExitCount:\t" << *ExitCount << "\n"
1068 << " was: " << *BI->getCondition() << "\n");
1069
1070 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1071 Value *OrigCond = BI->getCondition();
1072 // It's tempting to use replaceAllUsesWith here to fully replace the old
1073 // comparison, but that's not immediately safe, since users of the old
1074 // comparison may not be dominated by the new comparison. Instead, just
1075 // update the branch to use the new comparison; in the common case this
1076 // will make old comparison dead.
1077 BI->setCondition(Cond);
1078 DeadInsts.emplace_back(OrigCond);
1079
1080 ++NumLFTR;
1081 return true;
1082}
1083
1084//===----------------------------------------------------------------------===//
1085// sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1086//===----------------------------------------------------------------------===//
1087
1088/// If there's a single exit block, sink any loop-invariant values that
1089/// were defined in the preheader but not used inside the loop into the
1090/// exit block to reduce register pressure in the loop.
1091bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1092 BasicBlock *ExitBlock = L->getExitBlock();
1093 if (!ExitBlock) return false;
1094
1095 BasicBlock *Preheader = L->getLoopPreheader();
1096 if (!Preheader) return false;
1097
1098 bool MadeAnyChanges = false;
1099 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
1100 BasicBlock::iterator I(Preheader->getTerminator());
1101 while (I != Preheader->begin()) {
1102 --I;
1103 // New instructions were inserted at the end of the preheader.
1104 if (isa<PHINode>(I))
1105 break;
1106
1107 // Don't move instructions which might have side effects, since the side
1108 // effects need to complete before instructions inside the loop. Also don't
1109 // move instructions which might read memory, since the loop may modify
1110 // memory. Note that it's okay if the instruction might have undefined
1111 // behavior: LoopSimplify guarantees that the preheader dominates the exit
1112 // block.
1113 if (I->mayHaveSideEffects() || I->mayReadFromMemory())
1114 continue;
1115
1116 // Skip debug info intrinsics.
1117 if (isa<DbgInfoIntrinsic>(I))
1118 continue;
1119
1120 // Skip eh pad instructions.
1121 if (I->isEHPad())
1122 continue;
1123
1124 // Don't sink alloca: we never want to sink static alloca's out of the
1125 // entry block, and correctly sinking dynamic alloca's requires
1126 // checks for stacksave/stackrestore intrinsics.
1127 // FIXME: Refactor this check somehow?
1128 if (isa<AllocaInst>(I))
1129 continue;
1130
1131 // Determine if there is a use in or before the loop (direct or
1132 // otherwise).
1133 bool UsedInLoop = false;
1134 for (Use &U : I->uses()) {
1135 Instruction *User = cast<Instruction>(U.getUser());
1136 BasicBlock *UseBB = User->getParent();
1137 if (PHINode *P = dyn_cast<PHINode>(User)) {
1138 unsigned i =
1140 UseBB = P->getIncomingBlock(i);
1141 }
1142 if (UseBB == Preheader || L->contains(UseBB)) {
1143 UsedInLoop = true;
1144 break;
1145 }
1146 }
1147
1148 // If there is, the def must remain in the preheader.
1149 if (UsedInLoop)
1150 continue;
1151
1152 // Otherwise, sink it to the exit block.
1153 Instruction *ToMove = &*I;
1154 bool Done = false;
1155
1156 if (I != Preheader->begin()) {
1157 // Skip debug info intrinsics.
1158 do {
1159 --I;
1160 } while (I->isDebugOrPseudoInst() && I != Preheader->begin());
1161
1162 if (I->isDebugOrPseudoInst() && I == Preheader->begin())
1163 Done = true;
1164 } else {
1165 Done = true;
1166 }
1167
1168 MadeAnyChanges = true;
1169 ToMove->moveBefore(*ExitBlock, InsertPt);
1170 SE->forgetValue(ToMove);
1171 if (Done) break;
1172 InsertPt = ToMove->getIterator();
1173 }
1174
1175 return MadeAnyChanges;
1176}
1177
1178static void replaceExitCond(BranchInst *BI, Value *NewCond,
1180 auto *OldCond = BI->getCondition();
1181 LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI
1182 << " with " << *NewCond << "\n");
1183 BI->setCondition(NewCond);
1184 if (OldCond->use_empty())
1185 DeadInsts.emplace_back(OldCond);
1186}
1187
1188static Constant *createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB,
1189 bool IsTaken) {
1190 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1191 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1192 auto *OldCond = BI->getCondition();
1193 return ConstantInt::get(OldCond->getType(),
1194 IsTaken ? ExitIfTrue : !ExitIfTrue);
1195}
1196
1197static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1199 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1200 auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken);
1201 replaceExitCond(BI, NewCond, DeadInsts);
1202}
1203
1205 LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1206 ScalarEvolution &SE) {
1207 assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1208 auto *LoopPreheader = L->getLoopPreheader();
1209 auto *LoopHeader = L->getHeader();
1211 for (auto &PN : LoopHeader->phis()) {
1212 auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
1213 for (User *U : PN.users())
1214 Worklist.push_back(cast<Instruction>(U));
1215 SE.forgetValue(&PN);
1216 PN.replaceAllUsesWith(PreheaderIncoming);
1217 DeadInsts.emplace_back(&PN);
1218 }
1219
1220 // Replacing with the preheader value will often allow IV users to simplify
1221 // (especially if the preheader value is a constant).
1223 while (!Worklist.empty()) {
1224 auto *I = cast<Instruction>(Worklist.pop_back_val());
1225 if (!Visited.insert(I).second)
1226 continue;
1227
1228 // Don't simplify instructions outside the loop.
1229 if (!L->contains(I))
1230 continue;
1231
1232 Value *Res = simplifyInstruction(I, I->getDataLayout());
1233 if (Res && LI->replacementPreservesLCSSAForm(I, Res)) {
1234 for (User *U : I->users())
1235 Worklist.push_back(cast<Instruction>(U));
1236 I->replaceAllUsesWith(Res);
1237 DeadInsts.emplace_back(I);
1238 }
1239 }
1240}
1241
1242static Value *
1245 SCEVExpander &Rewriter) {
1246 ICmpInst::Predicate InvariantPred = LIP.Pred;
1247 BasicBlock *Preheader = L->getLoopPreheader();
1248 assert(Preheader && "Preheader doesn't exist");
1249 Rewriter.setInsertPoint(Preheader->getTerminator());
1250 auto *LHSV = Rewriter.expandCodeFor(LIP.LHS);
1251 auto *RHSV = Rewriter.expandCodeFor(LIP.RHS);
1252 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1253 if (ExitIfTrue)
1254 InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1255 IRBuilder<> Builder(Preheader->getTerminator());
1256 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1257 return Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1258 BI->getCondition()->getName());
1259}
1260
1261static std::optional<Value *>
1262createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB,
1263 const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1264 ScalarEvolution *SE, SCEVExpander &Rewriter) {
1265 CmpPredicate Pred = ICmp->getCmpPredicate();
1266 Value *LHS = ICmp->getOperand(0);
1267 Value *RHS = ICmp->getOperand(1);
1268
1269 // 'LHS pred RHS' should now mean that we stay in loop.
1270 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1271 if (Inverted)
1273
1274 const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1275 const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1276 // Can we prove it to be trivially true or false?
1277 if (auto EV = SE->evaluatePredicateAt(Pred, LHSS, RHSS, BI))
1278 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ !*EV);
1279
1280 auto *ARTy = LHSS->getType();
1281 auto *MaxIterTy = MaxIter->getType();
1282 // If possible, adjust types.
1283 if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1284 MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1285 else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1286 const SCEV *MinusOne = SE->getMinusOne(ARTy);
1287 const SCEV *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1288 if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1289 MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1290 }
1291
1292 if (SkipLastIter) {
1293 // Semantically skip last iter is "subtract 1, do not bother about unsigned
1294 // wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal
1295 // with umin in a smart way, but umin(a, b) - 1 will likely not simplify.
1296 // So we manually construct umin(a - 1, b - 1).
1298 if (auto *UMin = dyn_cast<SCEVUMinExpr>(MaxIter)) {
1299 for (const SCEV *Op : UMin->operands())
1300 Elements.push_back(SE->getMinusSCEV(Op, SE->getOne(Op->getType())));
1301 MaxIter = SE->getUMinFromMismatchedTypes(Elements);
1302 } else
1303 MaxIter = SE->getMinusSCEV(MaxIter, SE->getOne(MaxIter->getType()));
1304 }
1305
1306 // Check if there is a loop-invariant predicate equivalent to our check.
1307 auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1308 L, BI, MaxIter);
1309 if (!LIP)
1310 return std::nullopt;
1311
1312 // Can we prove it to be trivially true?
1313 if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1314 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ false);
1315 else
1316 return createInvariantCond(L, ExitingBB, *LIP, Rewriter);
1317}
1318
1320 const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter,
1321 bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter,
1323 assert(
1324 (L->contains(BI->getSuccessor(0)) != L->contains(BI->getSuccessor(1))) &&
1325 "Not a loop exit!");
1326
1327 // For branch that stays in loop by TRUE condition, go through AND. For branch
1328 // that stays in loop by FALSE condition, go through OR. Both gives the
1329 // similar logic: "stay in loop iff all conditions are true(false)".
1330 bool Inverted = L->contains(BI->getSuccessor(1));
1331 SmallVector<ICmpInst *, 4> LeafConditions;
1332 SmallVector<Value *, 4> Worklist;
1334 Value *OldCond = BI->getCondition();
1335 Visited.insert(OldCond);
1336 Worklist.push_back(OldCond);
1337
1338 auto GoThrough = [&](Value *V) {
1339 Value *LHS = nullptr, *RHS = nullptr;
1340 if (Inverted) {
1341 if (!match(V, m_LogicalOr(m_Value(LHS), m_Value(RHS))))
1342 return false;
1343 } else {
1344 if (!match(V, m_LogicalAnd(m_Value(LHS), m_Value(RHS))))
1345 return false;
1346 }
1347 if (Visited.insert(LHS).second)
1348 Worklist.push_back(LHS);
1349 if (Visited.insert(RHS).second)
1350 Worklist.push_back(RHS);
1351 return true;
1352 };
1353
1354 do {
1355 Value *Curr = Worklist.pop_back_val();
1356 // Go through AND/OR conditions. Collect leaf ICMPs. We only care about
1357 // those with one use, to avoid instruction duplication.
1358 if (Curr->hasOneUse())
1359 if (!GoThrough(Curr))
1360 if (auto *ICmp = dyn_cast<ICmpInst>(Curr))
1361 LeafConditions.push_back(ICmp);
1362 } while (!Worklist.empty());
1363
1364 // If the current basic block has the same exit count as the whole loop, and
1365 // it consists of multiple icmp's, try to collect all icmp's that give exact
1366 // same exit count. For all other icmp's, we could use one less iteration,
1367 // because their value on the last iteration doesn't really matter.
1368 SmallPtrSet<ICmpInst *, 4> ICmpsFailingOnLastIter;
1369 if (!SkipLastIter && LeafConditions.size() > 1 &&
1370 SE->getExitCount(L, ExitingBB,
1371 ScalarEvolution::ExitCountKind::SymbolicMaximum) ==
1372 MaxIter)
1373 for (auto *ICmp : LeafConditions) {
1374 auto EL = SE->computeExitLimitFromCond(L, ICmp, Inverted,
1375 /*ControlsExit*/ false);
1376 const SCEV *ExitMax = EL.SymbolicMaxNotTaken;
1377 if (isa<SCEVCouldNotCompute>(ExitMax))
1378 continue;
1379 // They could be of different types (specifically this happens after
1380 // IV widening).
1381 auto *WiderType =
1382 SE->getWiderType(ExitMax->getType(), MaxIter->getType());
1383 const SCEV *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType);
1384 const SCEV *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType);
1385 if (WideExitMax == WideMaxIter)
1386 ICmpsFailingOnLastIter.insert(ICmp);
1387 }
1388
1389 bool Changed = false;
1390 for (auto *OldCond : LeafConditions) {
1391 // Skip last iteration for this icmp under one of two conditions:
1392 // - We do it for all conditions;
1393 // - There is another ICmp that would fail on last iter, so this one doesn't
1394 // really matter.
1395 bool OptimisticSkipLastIter = SkipLastIter;
1396 if (!OptimisticSkipLastIter) {
1397 if (ICmpsFailingOnLastIter.size() > 1)
1398 OptimisticSkipLastIter = true;
1399 else if (ICmpsFailingOnLastIter.size() == 1)
1400 OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(OldCond);
1401 }
1402 if (auto Replaced =
1403 createReplacement(OldCond, L, ExitingBB, MaxIter, Inverted,
1404 OptimisticSkipLastIter, SE, Rewriter)) {
1405 Changed = true;
1406 auto *NewCond = *Replaced;
1407 if (auto *NCI = dyn_cast<Instruction>(NewCond)) {
1408 NCI->setName(OldCond->getName() + ".first_iter");
1409 }
1410 LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond
1411 << " with " << *NewCond << "\n");
1412 assert(OldCond->hasOneUse() && "Must be!");
1413 OldCond->replaceAllUsesWith(NewCond);
1414 DeadInsts.push_back(OldCond);
1415 // Make sure we no longer consider this condition as failing on last
1416 // iteration.
1417 ICmpsFailingOnLastIter.erase(OldCond);
1418 }
1419 }
1420 return Changed;
1421}
1422
1423bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1424 // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1425 // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1426 // never reaches the icmp since the zext doesn't fold to an AddRec unless
1427 // it already has flags. The alternative to this would be to extending the
1428 // set of "interesting" IV users to include the icmp, but doing that
1429 // regresses results in practice by querying SCEVs before trip counts which
1430 // rely on them which results in SCEV caching sub-optimal answers. The
1431 // concern about caching sub-optimal results is why we only query SCEVs of
1432 // the loop invariant RHS here.
1433 SmallVector<BasicBlock*, 16> ExitingBlocks;
1434 L->getExitingBlocks(ExitingBlocks);
1435 bool Changed = false;
1436 for (auto *ExitingBB : ExitingBlocks) {
1437 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1438 if (!BI)
1439 continue;
1440 assert(BI->isConditional() && "exit branch must be conditional");
1441
1442 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1443 if (!ICmp || !ICmp->hasOneUse())
1444 continue;
1445
1446 auto *LHS = ICmp->getOperand(0);
1447 auto *RHS = ICmp->getOperand(1);
1448 // For the range reasoning, avoid computing SCEVs in the loop to avoid
1449 // poisoning cache with sub-optimal results. For the must-execute case,
1450 // this is a neccessary precondition for correctness.
1451 if (!L->isLoopInvariant(RHS)) {
1452 if (!L->isLoopInvariant(LHS))
1453 continue;
1454 // Same logic applies for the inverse case
1455 std::swap(LHS, RHS);
1456 }
1457
1458 // Match (icmp signed-cond zext, RHS)
1459 Value *LHSOp = nullptr;
1460 if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
1461 continue;
1462
1463 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1464 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1465 auto FullCR = ConstantRange::getFull(InnerBitWidth);
1466 FullCR = FullCR.zeroExtend(OuterBitWidth);
1467 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1468 if (FullCR.contains(RHSCR)) {
1469 // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1470 // replace the signed condition with the unsigned version.
1471 ICmp->setPredicate(ICmp->getUnsignedPredicate());
1472 Changed = true;
1473 // Note: No SCEV invalidation needed. We've changed the predicate, but
1474 // have not changed exit counts, or the values produced by the compare.
1475 continue;
1476 }
1477 }
1478
1479 // Now that we've canonicalized the condition to match the extend,
1480 // see if we can rotate the extend out of the loop.
1481 for (auto *ExitingBB : ExitingBlocks) {
1482 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1483 if (!BI)
1484 continue;
1485 assert(BI->isConditional() && "exit branch must be conditional");
1486
1487 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1488 if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
1489 continue;
1490
1491 bool Swapped = false;
1492 auto *LHS = ICmp->getOperand(0);
1493 auto *RHS = ICmp->getOperand(1);
1494 if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS))
1495 // Nothing to rotate
1496 continue;
1497 if (L->isLoopInvariant(LHS)) {
1498 // Same logic applies for the inverse case until we actually pick
1499 // which operand of the compare to update.
1500 Swapped = true;
1501 std::swap(LHS, RHS);
1502 }
1503 assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
1504
1505 // Match (icmp unsigned-cond zext, RHS)
1506 // TODO: Extend to handle corresponding sext/signed-cmp case
1507 // TODO: Extend to other invertible functions
1508 Value *LHSOp = nullptr;
1509 if (!match(LHS, m_ZExt(m_Value(LHSOp))))
1510 continue;
1511
1512 // In general, we only rotate if we can do so without increasing the number
1513 // of instructions. The exception is when we have an zext(add-rec). The
1514 // reason for allowing this exception is that we know we need to get rid
1515 // of the zext for SCEV to be able to compute a trip count for said loops;
1516 // we consider the new trip count valuable enough to increase instruction
1517 // count by one.
1518 if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))
1519 continue;
1520
1521 // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1522 // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1523 // when zext is loop varying and RHS is loop invariant. This converts
1524 // loop varying work to loop-invariant work.
1525 auto doRotateTransform = [&]() {
1526 assert(ICmp->isUnsigned() && "must have proven unsigned already");
1527 auto *NewRHS = CastInst::Create(
1528 Instruction::Trunc, RHS, LHSOp->getType(), "",
1529 L->getLoopPreheader()->getTerminator()->getIterator());
1530 ICmp->setOperand(Swapped ? 1 : 0, LHSOp);
1531 ICmp->setOperand(Swapped ? 0 : 1, NewRHS);
1532 // Samesign flag cannot be preserved after narrowing the compare.
1533 ICmp->setSameSign(false);
1534 if (LHS->use_empty())
1535 DeadInsts.push_back(LHS);
1536 };
1537
1538 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1539 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1540 auto FullCR = ConstantRange::getFull(InnerBitWidth);
1541 FullCR = FullCR.zeroExtend(OuterBitWidth);
1542 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1543 if (FullCR.contains(RHSCR)) {
1544 doRotateTransform();
1545 Changed = true;
1546 // Note, we are leaving SCEV in an unfortunately imprecise case here
1547 // as rotation tends to reveal information about trip counts not
1548 // previously visible.
1549 continue;
1550 }
1551 }
1552
1553 return Changed;
1554}
1555
1556bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1557 SmallVector<BasicBlock*, 16> ExitingBlocks;
1558 L->getExitingBlocks(ExitingBlocks);
1559
1560 // Remove all exits which aren't both rewriteable and execute on every
1561 // iteration.
1562 llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1563 // If our exitting block exits multiple loops, we can only rewrite the
1564 // innermost one. Otherwise, we're changing how many times the innermost
1565 // loop runs before it exits.
1566 if (LI->getLoopFor(ExitingBB) != L)
1567 return true;
1568
1569 // Can't rewrite non-branch yet.
1570 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1571 if (!BI)
1572 return true;
1573
1574 // Likewise, the loop latch must be dominated by the exiting BB.
1575 if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1576 return true;
1577
1578 if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
1579 // If already constant, nothing to do. However, if this is an
1580 // unconditional exit, we can still replace header phis with their
1581 // preheader value.
1582 if (!L->contains(BI->getSuccessor(CI->isNullValue())))
1583 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1584 return true;
1585 }
1586
1587 return false;
1588 });
1589
1590 if (ExitingBlocks.empty())
1591 return false;
1592
1593 // Get a symbolic upper bound on the loop backedge taken count.
1594 const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L);
1595 if (isa<SCEVCouldNotCompute>(MaxBECount))
1596 return false;
1597
1598 // Visit our exit blocks in order of dominance. We know from the fact that
1599 // all exits must dominate the latch, so there is a total dominance order
1600 // between them.
1601 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1602 // std::sort sorts in ascending order, so we want the inverse of
1603 // the normal dominance relation.
1604 if (A == B) return false;
1605 if (DT->properlyDominates(A, B))
1606 return true;
1607 else {
1608 assert(DT->properlyDominates(B, A) &&
1609 "expected total dominance order!");
1610 return false;
1611 }
1612 });
1613#ifdef ASSERT
1614 for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1615 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1616 }
1617#endif
1618
1619 bool Changed = false;
1620 bool SkipLastIter = false;
1621 const SCEV *CurrMaxExit = SE->getCouldNotCompute();
1622 auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) {
1623 if (SkipLastIter || isa<SCEVCouldNotCompute>(MaxExitCount))
1624 return;
1625 if (isa<SCEVCouldNotCompute>(CurrMaxExit))
1626 CurrMaxExit = MaxExitCount;
1627 else
1628 CurrMaxExit = SE->getUMinFromMismatchedTypes(CurrMaxExit, MaxExitCount);
1629 // If the loop has more than 1 iteration, all further checks will be
1630 // executed 1 iteration less.
1631 if (CurrMaxExit == MaxBECount)
1632 SkipLastIter = true;
1633 };
1634 SmallSet<const SCEV *, 8> DominatingExactExitCounts;
1635 for (BasicBlock *ExitingBB : ExitingBlocks) {
1636 const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBB);
1637 const SCEV *MaxExitCount = SE->getExitCount(
1638 L, ExitingBB, ScalarEvolution::ExitCountKind::SymbolicMaximum);
1639 if (isa<SCEVCouldNotCompute>(ExactExitCount)) {
1640 // Okay, we do not know the exit count here. Can we at least prove that it
1641 // will remain the same within iteration space?
1642 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1643 auto OptimizeCond = [&](bool SkipLastIter) {
1644 return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB,
1645 MaxBECount, SkipLastIter,
1646 SE, Rewriter, DeadInsts);
1647 };
1648
1649 // TODO: We might have proved that we can skip the last iteration for
1650 // this check. In this case, we only want to check the condition on the
1651 // pre-last iteration (MaxBECount - 1). However, there is a nasty
1652 // corner case:
1653 //
1654 // for (i = len; i != 0; i--) { ... check (i ult X) ... }
1655 //
1656 // If we could not prove that len != 0, then we also could not prove that
1657 // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1658 // OptimizeCond will likely not prove anything for it, even if it could
1659 // prove the same fact for len.
1660 //
1661 // As a temporary solution, we query both last and pre-last iterations in
1662 // hope that we will be able to prove triviality for at least one of
1663 // them. We can stop querying MaxBECount for this case once SCEV
1664 // understands that (MaxBECount - 1) will not overflow here.
1665 if (OptimizeCond(false))
1666 Changed = true;
1667 else if (SkipLastIter && OptimizeCond(true))
1668 Changed = true;
1669 UpdateSkipLastIter(MaxExitCount);
1670 continue;
1671 }
1672
1673 UpdateSkipLastIter(ExactExitCount);
1674
1675 // If we know we'd exit on the first iteration, rewrite the exit to
1676 // reflect this. This does not imply the loop must exit through this
1677 // exit; there may be an earlier one taken on the first iteration.
1678 // We know that the backedge can't be taken, so we replace all
1679 // the header PHIs with values coming from the preheader.
1680 if (ExactExitCount->isZero()) {
1681 foldExit(L, ExitingBB, true, DeadInsts);
1682 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1683 Changed = true;
1684 continue;
1685 }
1686
1687 assert(ExactExitCount->getType()->isIntegerTy() &&
1688 MaxBECount->getType()->isIntegerTy() &&
1689 "Exit counts must be integers");
1690
1691 Type *WiderType =
1692 SE->getWiderType(MaxBECount->getType(), ExactExitCount->getType());
1693 ExactExitCount = SE->getNoopOrZeroExtend(ExactExitCount, WiderType);
1694 MaxBECount = SE->getNoopOrZeroExtend(MaxBECount, WiderType);
1695 assert(MaxBECount->getType() == ExactExitCount->getType());
1696
1697 // Can we prove that some other exit must be taken strictly before this
1698 // one?
1699 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxBECount,
1700 ExactExitCount)) {
1701 foldExit(L, ExitingBB, false, DeadInsts);
1702 Changed = true;
1703 continue;
1704 }
1705
1706 // As we run, keep track of which exit counts we've encountered. If we
1707 // find a duplicate, we've found an exit which would have exited on the
1708 // exiting iteration, but (from the visit order) strictly follows another
1709 // which does the same and is thus dead.
1710 if (!DominatingExactExitCounts.insert(ExactExitCount).second) {
1711 foldExit(L, ExitingBB, false, DeadInsts);
1712 Changed = true;
1713 continue;
1714 }
1715
1716 // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1717 // here. If we kept track of the min of dominanting exits so far, we could
1718 // discharge exits with EC >= MDEC. This is less powerful than the existing
1719 // transform (since later exits aren't considered), but potentially more
1720 // powerful for any case where SCEV can prove a >=u b, but neither a == b
1721 // or a >u b. Such a case is not currently known.
1722 }
1723 return Changed;
1724}
1725
1726bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1727 SmallVector<BasicBlock*, 16> ExitingBlocks;
1728 L->getExitingBlocks(ExitingBlocks);
1729
1730 // Finally, see if we can rewrite our exit conditions into a loop invariant
1731 // form. If we have a read-only loop, and we can tell that we must exit down
1732 // a path which does not need any of the values computed within the loop, we
1733 // can rewrite the loop to exit on the first iteration. Note that this
1734 // doesn't either a) tell us the loop exits on the first iteration (unless
1735 // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1736 // This transformation looks a lot like a restricted form of dead loop
1737 // elimination, but restricted to read-only loops and without neccesssarily
1738 // needing to kill the loop entirely.
1739 if (!LoopPredication)
1740 return false;
1741
1742 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1743 // through *explicit* control flow. We have to eliminate the possibility of
1744 // implicit exits (see below) before we know it's truly exact.
1745 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1746 if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC))
1747 return false;
1748
1749 assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1750 assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1751
1752 auto BadExit = [&](BasicBlock *ExitingBB) {
1753 // If our exiting block exits multiple loops, we can only rewrite the
1754 // innermost one. Otherwise, we're changing how many times the innermost
1755 // loop runs before it exits.
1756 if (LI->getLoopFor(ExitingBB) != L)
1757 return true;
1758
1759 // Can't rewrite non-branch yet.
1760 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1761 if (!BI)
1762 return true;
1763
1764 // If already constant, nothing to do.
1765 if (isa<Constant>(BI->getCondition()))
1766 return true;
1767
1768 // If the exit block has phis, we need to be able to compute the values
1769 // within the loop which contains them. This assumes trivially lcssa phis
1770 // have already been removed; TODO: generalize
1771 BasicBlock *ExitBlock =
1772 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1773 if (!ExitBlock->phis().empty())
1774 return true;
1775
1776 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1777 if (isa<SCEVCouldNotCompute>(ExitCount) ||
1778 !Rewriter.isSafeToExpand(ExitCount))
1779 return true;
1780
1781 assert(SE->isLoopInvariant(ExitCount, L) &&
1782 "Exit count must be loop invariant");
1783 assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1784 return false;
1785 };
1786
1787 // Make sure all exits dominate the latch. This means there is a linear chain
1788 // of exits. We check this before sorting so we have a total order.
1789 BasicBlock *Latch = L->getLoopLatch();
1790 for (BasicBlock *ExitingBB : ExitingBlocks)
1791 if (!DT->dominates(ExitingBB, Latch))
1792 return false;
1793
1794 // If we have any exits which can't be predicated themselves, than we can't
1795 // predicate any exit which isn't guaranteed to execute before it. Consider
1796 // two exits (a) and (b) which would both exit on the same iteration. If we
1797 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1798 // we could convert a loop from exiting through (a) to one exiting through
1799 // (b). Note that this problem exists only for exits with the same exit
1800 // count, and we could be more aggressive when exit counts are known inequal.
1801 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1802 // llvm::sort sorts in ascending order, so we want the inverse of
1803 // the normal dominance relation.
1804 if (A == B)
1805 return false;
1806 if (DT->properlyDominates(A, B))
1807 return true;
1808 if (DT->properlyDominates(B, A))
1809 return false;
1810 llvm_unreachable("Should have total dominance order");
1811 });
1812
1813 // Make sure our exit blocks are really a total order (i.e. a linear chain of
1814 // exits before the backedge).
1815 for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1816 assert(DT->dominates(ExitingBlocks[i - 1], ExitingBlocks[i]) &&
1817 "Not sorted by dominance");
1818
1819 // Given our sorted total order, we know that exit[j] must be evaluated
1820 // after all exit[i] such j > i.
1821 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1822 if (BadExit(ExitingBlocks[i])) {
1823 ExitingBlocks.resize(i);
1824 break;
1825 }
1826
1827 if (ExitingBlocks.empty())
1828 return false;
1829
1830 // At this point, ExitingBlocks consists of only those blocks which are
1831 // predicatable. Given that, we know we have at least one exit we can
1832 // predicate if the loop is doesn't have side effects and doesn't have any
1833 // implicit exits (because then our exact BTC isn't actually exact).
1834 // @Reviewers - As structured, this is O(I^2) for loop nests. Any
1835 // suggestions on how to improve this? I can obviously bail out for outer
1836 // loops, but that seems less than ideal. MemorySSA can find memory writes,
1837 // is that enough for *all* side effects?
1838 for (BasicBlock *BB : L->blocks())
1839 for (auto &I : *BB)
1840 // TODO:isGuaranteedToTransfer
1841 if (I.mayHaveSideEffects())
1842 return false;
1843
1844 bool Changed = false;
1845 // Finally, do the actual predication for all predicatable blocks. A couple
1846 // of notes here:
1847 // 1) We don't bother to constant fold dominated exits with identical exit
1848 // counts; that's simply a form of CSE/equality propagation and we leave
1849 // it for dedicated passes.
1850 // 2) We insert the comparison at the branch. Hoisting introduces additional
1851 // legality constraints and we leave that to dedicated logic. We want to
1852 // predicate even if we can't insert a loop invariant expression as
1853 // peeling or unrolling will likely reduce the cost of the otherwise loop
1854 // varying check.
1855 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1856 IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1857 Value *ExactBTCV = nullptr; // Lazily generated if needed.
1858 for (BasicBlock *ExitingBB : ExitingBlocks) {
1859 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1860
1861 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1862 Value *NewCond;
1863 if (ExitCount == ExactBTC) {
1864 NewCond = L->contains(BI->getSuccessor(0)) ?
1865 B.getFalse() : B.getTrue();
1866 } else {
1867 Value *ECV = Rewriter.expandCodeFor(ExitCount);
1868 if (!ExactBTCV)
1869 ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
1870 Value *RHS = ExactBTCV;
1871 if (ECV->getType() != RHS->getType()) {
1872 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1873 ECV = B.CreateZExt(ECV, WiderTy);
1874 RHS = B.CreateZExt(RHS, WiderTy);
1875 }
1876 auto Pred = L->contains(BI->getSuccessor(0)) ?
1877 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1878 NewCond = B.CreateICmp(Pred, ECV, RHS);
1879 }
1880 Value *OldCond = BI->getCondition();
1881 BI->setCondition(NewCond);
1882 if (OldCond->use_empty())
1883 DeadInsts.emplace_back(OldCond);
1884 Changed = true;
1885 RunUnswitching = true;
1886 }
1887
1888 return Changed;
1889}
1890
1891//===----------------------------------------------------------------------===//
1892// IndVarSimplify driver. Manage several subpasses of IV simplification.
1893//===----------------------------------------------------------------------===//
1894
1895bool IndVarSimplify::run(Loop *L) {
1896 // We need (and expect!) the incoming loop to be in LCSSA.
1897 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1898 "LCSSA required to run indvars!");
1899
1900 // If LoopSimplify form is not available, stay out of trouble. Some notes:
1901 // - LSR currently only supports LoopSimplify-form loops. Indvars'
1902 // canonicalization can be a pessimization without LSR to "clean up"
1903 // afterwards.
1904 // - We depend on having a preheader; in particular,
1905 // Loop::getCanonicalInductionVariable only supports loops with preheaders,
1906 // and we're in trouble if we can't find the induction variable even when
1907 // we've manually inserted one.
1908 // - LFTR relies on having a single backedge.
1909 if (!L->isLoopSimplifyForm())
1910 return false;
1911
1912 bool Changed = false;
1913 // If there are any floating-point recurrences, attempt to
1914 // transform them to use integer recurrences.
1915 Changed |= rewriteNonIntegerIVs(L);
1916
1917 // Create a rewriter object which we'll use to transform the code with.
1918 SCEVExpander Rewriter(*SE, DL, "indvars");
1919#if LLVM_ENABLE_ABI_BREAKING_CHECKS
1920 Rewriter.setDebugType(DEBUG_TYPE);
1921#endif
1922
1923 // Eliminate redundant IV users.
1924 //
1925 // Simplification works best when run before other consumers of SCEV. We
1926 // attempt to avoid evaluating SCEVs for sign/zero extend operations until
1927 // other expressions involving loop IVs have been evaluated. This helps SCEV
1928 // set no-wrap flags before normalizing sign/zero extension.
1929 Rewriter.disableCanonicalMode();
1930 Changed |= simplifyAndExtend(L, Rewriter, LI);
1931
1932 // Check to see if we can compute the final value of any expressions
1933 // that are recurrent in the loop, and substitute the exit values from the
1934 // loop into any instructions outside of the loop that use the final values
1935 // of the current expressions.
1936 if (ReplaceExitValue != NeverRepl) {
1937 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
1938 ReplaceExitValue, DeadInsts)) {
1939 NumReplaced += Rewrites;
1940 Changed = true;
1941 }
1942 }
1943
1944 // Eliminate redundant IV cycles.
1945 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
1946
1947 // Try to convert exit conditions to unsigned and rotate computation
1948 // out of the loop. Note: Handles invalidation internally if needed.
1949 Changed |= canonicalizeExitCondition(L);
1950
1951 // Try to eliminate loop exits based on analyzeable exit counts
1952 if (optimizeLoopExits(L, Rewriter)) {
1953 Changed = true;
1954 // Given we've changed exit counts, notify SCEV
1955 // Some nested loops may share same folded exit basic block,
1956 // thus we need to notify top most loop.
1957 SE->forgetTopmostLoop(L);
1958 }
1959
1960 // Try to form loop invariant tests for loop exits by changing how many
1961 // iterations of the loop run when that is unobservable.
1962 if (predicateLoopExits(L, Rewriter)) {
1963 Changed = true;
1964 // Given we've changed exit counts, notify SCEV
1965 SE->forgetLoop(L);
1966 }
1967
1968 // If we have a trip count expression, rewrite the loop's exit condition
1969 // using it.
1970 if (!DisableLFTR) {
1971 BasicBlock *PreHeader = L->getLoopPreheader();
1972
1973 SmallVector<BasicBlock*, 16> ExitingBlocks;
1974 L->getExitingBlocks(ExitingBlocks);
1975 for (BasicBlock *ExitingBB : ExitingBlocks) {
1976 // Can't rewrite non-branch yet.
1977 if (!isa<BranchInst>(ExitingBB->getTerminator()))
1978 continue;
1979
1980 // If our exitting block exits multiple loops, we can only rewrite the
1981 // innermost one. Otherwise, we're changing how many times the innermost
1982 // loop runs before it exits.
1983 if (LI->getLoopFor(ExitingBB) != L)
1984 continue;
1985
1986 if (!needsLFTR(L, ExitingBB))
1987 continue;
1988
1989 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1990 if (isa<SCEVCouldNotCompute>(ExitCount))
1991 continue;
1992
1993 // This was handled above, but as we form SCEVs, we can sometimes refine
1994 // existing ones; this allows exit counts to be folded to zero which
1995 // weren't when optimizeLoopExits saw them. Arguably, we should iterate
1996 // until stable to handle cases like this better.
1997 if (ExitCount->isZero())
1998 continue;
1999
2000 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2001 if (!IndVar)
2002 continue;
2003
2004 // Avoid high cost expansions. Note: This heuristic is questionable in
2005 // that our definition of "high cost" is not exactly principled.
2006 if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
2007 TTI, PreHeader->getTerminator()))
2008 continue;
2009
2010 if (!Rewriter.isSafeToExpand(ExitCount))
2011 continue;
2012
2013 Changed |= linearFunctionTestReplace(L, ExitingBB,
2014 ExitCount, IndVar,
2015 Rewriter);
2016 }
2017 }
2018 // Clear the rewriter cache, because values that are in the rewriter's cache
2019 // can be deleted in the loop below, causing the AssertingVH in the cache to
2020 // trigger.
2021 Rewriter.clear();
2022
2023 // Now that we're done iterating through lists, clean up any instructions
2024 // which are now dead.
2025 while (!DeadInsts.empty()) {
2026 Value *V = DeadInsts.pop_back_val();
2027
2028 if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
2029 Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
2030 else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
2031 Changed |=
2032 RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2033 }
2034
2035 // The Rewriter may not be used from this point on.
2036
2037 // Loop-invariant instructions in the preheader that aren't used in the
2038 // loop may be sunk below the loop to reduce register pressure.
2039 Changed |= sinkUnusedInvariants(L);
2040
2041 // rewriteFirstIterationLoopExitValues does not rely on the computation of
2042 // trip count and therefore can further simplify exit values in addition to
2043 // rewriteLoopExitValues.
2044 Changed |= rewriteFirstIterationLoopExitValues(L);
2045
2046 // Clean up dead instructions.
2047 Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2048
2049 // Check a post-condition.
2050 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2051 "Indvars did not preserve LCSSA!");
2052 if (VerifyMemorySSA && MSSAU)
2053 MSSAU->getMemorySSA()->verifyMemorySSA();
2054
2055 return Changed;
2056}
2057
2060 LPMUpdater &) {
2061 Function *F = L.getHeader()->getParent();
2062 const DataLayout &DL = F->getDataLayout();
2063
2064 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2065 WidenIndVars && AllowIVWidening);
2066 if (!IVS.run(&L))
2067 return PreservedAnalyses::all();
2068
2069 auto PA = getLoopPassPreservedAnalyses();
2070 PA.preserveSet<CFGAnalyses>();
2071 if (IVS.runUnswitching()) {
2073 PA.preserve<ShouldRunExtraSimpleLoopUnswitch>();
2074 }
2075
2076 if (AR.MSSA)
2077 PA.preserve<MemorySSAAnalysis>();
2078 return PA;
2079}
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:686
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
static Value * genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, const SCEV *ExitCount, bool UsePostInc, Loop *L, SCEVExpander &Rewriter, ScalarEvolution *SE)
Insert an IR expression which computes the value held by the IV IndVar (which must be an loop counter...
static void replaceExitCond(BranchInst *BI, Value *NewCond, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
static cl::opt< bool > DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), cl::desc("Disable Linear Function Test Replace optimization"))
static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB)
Whether the current loop exit test is based on this value.
static cl::opt< ReplaceExitVal > ReplaceExitValue("replexitval", cl::Hidden, cl::init(OnlyCheapRepl), cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), clEnumValN(OnlyCheapRepl, "cheap", "only replace exit value when the cost is cheap"), clEnumValN(UnusedIndVarInLoop, "unusedindvarinloop", "only replace exit value when it is an unused " "induction variable in the loop and has cheap replacement cost"), clEnumValN(NoHardUse, "noharduse", "only replace exit values when loop def likely dead"), clEnumValN(AlwaysRepl, "always", "always replace exit value whenever possible")))
static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, const TargetTransformInfo *TTI)
Update information about the induction variable that is extended by this sign or zero extend operatio...
static void replaceLoopPHINodesWithPreheaderValues(LoopInfo *LI, Loop *L, SmallVectorImpl< WeakTrackingVH > &DeadInsts, ScalarEvolution &SE)
static bool needsLFTR(Loop *L, BasicBlock *ExitingBB)
linearFunctionTestReplace policy.
static bool optimizeLoopExitWithUnknownExitCount(const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter, bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
static Value * createInvariantCond(const Loop *L, BasicBlock *ExitingBB, const ScalarEvolution::LoopInvariantPredicate &LIP, SCEVExpander &Rewriter)
static bool isLoopCounter(PHINode *Phi, Loop *L, ScalarEvolution *SE)
Return true if the given phi is a "counter" in L.
static std::optional< Value * > createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB, const SCEV *MaxIter, bool Inverted, bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter)
static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl< Value * > &Visited, unsigned Depth)
Recursive helper for hasConcreteDef().
static bool hasConcreteDef(Value *V)
Return true if the given value is concrete.
static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken, SmallVectorImpl< WeakTrackingVH > &DeadInsts)
static PHINode * getLoopPhiForCounter(Value *IncV, Loop *L)
Given an Value which is hoped to be part of an add recurance in the given loop, return the associated...
static Constant * createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB, bool IsTaken)
static cl::opt< bool > UsePostIncrementRanges("indvars-post-increment-ranges", cl::Hidden, cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), cl::init(true))
#define DEBUG_TYPE
static PHINode * FindLoopCounter(Loop *L, BasicBlock *ExitingBB, const SCEV *BECount, ScalarEvolution *SE, DominatorTree *DT)
Search the loop header for a loop counter (anadd rec w/step of one) suitable for use by LFTR.
static cl::opt< bool > AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), cl::desc("Allow widening of indvars to eliminate s/zext"))
static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal)
Convert APF to an integer, if possible.
static cl::opt< bool > LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), cl::desc("Predicate conditions in read only loops"))
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
if(PassOpts->AAPipeline)
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
This pass exposes codegen information to IR-level passes.
Virtual Register Rewriter
Definition: VirtRegMap.cpp:261
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1326
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:461
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:530
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:437
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:240
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:444
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:608
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:702
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:703
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:679
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:688
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:677
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:678
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:700
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:687
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:681
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:684
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:698
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:685
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:680
@ ICMP_EQ
equal
Definition: InstrTypes.h:694
@ ICMP_NE
not equal
Definition: InstrTypes.h:695
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:701
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:689
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:686
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Definition: CmpPredicate.h:22
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:271
const APFloat & getValueAPF() const
Definition: Constants.h:314
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.h:126
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
Definition: DataLayout.h:219
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
This instruction compares its operands according to the predicate given to the constructor.
This instruction compares its operands according to the predicate given to the constructor.
CmpPredicate getCmpPredicate() const
CmpPredicate getInverseCmpPredicate() const
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2380
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2705
Interface for visiting interesting IV users that are recognized but not simplified by this utility.
virtual void visitCast(CastInst *Cast)=0
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:511
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:310
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:508
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:76
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Class to represent integer types.
Definition: DerivedTypes.h:42
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form.
Definition: LoopInfo.h:439
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:928
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:701
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:310
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static unsigned getIncomingValueNumForOperand(unsigned i)
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const SCEVAddRecExpr * getPostIncExpr(ScalarEvolution &SE) const
Return an expression representing the value of this expression one iteration of the loop ahead.
This class uses information about analyze scalars to rewrite expressions in canonical form.
This class represents an analyzed expression in the program.
bool isOne() const
Return true if the expression is a constant one.
bool isZero() const
Return true if the expression is a constant zero.
Type * getType() const
Return the LLVM type of this SCEV expression.
This class represents a cast from signed integer to floating point.
The main scalar evolution driver.
std::optional< LoopInvariantPredicate > getLoopInvariantExitCondDuringFirstIterations(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L, const Instruction *CtxI, const SCEV *MaxIter)
If the result of the predicate LHS Pred RHS is loop invariant with respect to L at given Context duri...
Type * getWiderType(Type *Ty1, Type *Ty2) const
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
ExitLimit computeExitLimitFromCond(const Loop *L, Value *ExitCond, bool ExitIfTrue, bool ControlsOnlyExit, bool AllowPredicates=false)
Compute the number of times the backedge of the specified loop will execute if its exit condition wer...
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
std::optional< bool > evaluatePredicateAt(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Check whether the condition described by Pred, LHS, and RHS is true or false in the given Context.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
const SCEV * getMinusOne(Type *Ty)
Return a SCEV for the constant -1 of a specific type.
const SCEV * getNoopOrZeroExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
const SCEV * getUMinFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS, bool Sequential=false)
Promote the operands to the wider of the types using zero-extension, and then perform a umin operatio...
const SCEV * getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind=Exact)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
bool isKnownPredicateAt(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
size_type size() const
Definition: SmallPtrSet.h:94
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:401
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:452
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:264
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
Value * getOperand(unsigned i) const
Definition: User.h:228
unsigned getNumOperands() const
Definition: User.h:250
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1094
user_iterator_impl< User > user_iterator
Definition: Value.h:390
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:204
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclarationIfExists(Module *M, ID id, ArrayRef< Type * > Tys, FunctionType *FT=nullptr)
This version supports overloaded intrinsics.
Definition: Intrinsics.cpp:747
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:711
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, Instruction *OnPathTo, DominatorTree *DT)
Return true if undefined behavior would provable be executed on the path to OnPathTo if Root produced...
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:543
@ Done
Definition: Threading.h:60
PHINode * createWideIV(const WideIVInfo &WI, LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter, DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, unsigned &NumElimExt, unsigned &NumWidened, bool HasGuards, bool UsePostIncrementRanges)
Widen Induction Variables - Extend the width of an IV to cover its widest uses.
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1664
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
cl::opt< unsigned > SCEVCheapExpansionBudget
std::pair< bool, bool > simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, const TargetTransformInfo *TTI, SmallVectorImpl< WeakTrackingVH > &Dead, SCEVExpander &Rewriter, IVVisitor *V=nullptr)
simplifyUsersOfIV - Simplify instructions that use this induction variable by using ScalarEvolution t...
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
TargetTransformInfo TTI
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:84
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2099
bool isAlmostDeadIV(PHINode *IV, BasicBlock *LatchBlock, Value *Cond)
Return true if the induction variable IV in a Loop whose latch is LatchBlock would become dead if the...
Definition: LoopUtils.cpp:470
int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
Definition: LoopUtils.cpp:1549
bool RecursivelyDeleteDeadPHINode(PHINode *PN, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is an effectively dead PHI node, due to being a def-use chain of single-use no...
Definition: Local.cpp:654
@ UnusedIndVarInLoop
Definition: LoopUtils.h:482
@ OnlyCheapRepl
Definition: LoopUtils.h:480
@ NeverRepl
Definition: LoopUtils.h:479
@ NoHardUse
Definition: LoopUtils.h:481
@ AlwaysRepl
Definition: LoopUtils.h:483
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
A marker analysis to determine if SimpleLoopUnswitch should run again on a given loop.
Collect information about induction variables that are used by sign/zero extend operations.
PHINode * NarrowIV