LLVM 22.0.0git
ScalarEvolutionExpander.cpp
Go to the documentation of this file.
1//===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the implementation of the scalar evolution expander,
10// which is used to generate the code corresponding to a given scalar evolution
11// expression.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/ScopeExit.h"
23#include "llvm/IR/DataLayout.h"
24#include "llvm/IR/Dominators.h"
30
31#if LLVM_ENABLE_ABI_BREAKING_CHECKS
32#define SCEV_DEBUG_WITH_TYPE(TYPE, X) DEBUG_WITH_TYPE(TYPE, X)
33#else
34#define SCEV_DEBUG_WITH_TYPE(TYPE, X)
35#endif
36
37using namespace llvm;
38
40 "scev-cheap-expansion-budget", cl::Hidden, cl::init(4),
41 cl::desc("When performing SCEV expansion only if it is cheap to do, this "
42 "controls the budget that is considered cheap (default = 4)"));
43
44using namespace PatternMatch;
45using namespace SCEVPatternMatch;
46
48 NUW = false;
49 NSW = false;
50 Exact = false;
51 Disjoint = false;
52 NNeg = false;
53 SameSign = false;
55 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I)) {
56 NUW = OBO->hasNoUnsignedWrap();
57 NSW = OBO->hasNoSignedWrap();
58 }
59 if (auto *PEO = dyn_cast<PossiblyExactOperator>(I))
60 Exact = PEO->isExact();
61 if (auto *PDI = dyn_cast<PossiblyDisjointInst>(I))
62 Disjoint = PDI->isDisjoint();
63 if (auto *PNI = dyn_cast<PossiblyNonNegInst>(I))
64 NNeg = PNI->hasNonNeg();
65 if (auto *TI = dyn_cast<TruncInst>(I)) {
66 NUW = TI->hasNoUnsignedWrap();
67 NSW = TI->hasNoSignedWrap();
68 }
69 if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
70 GEPNW = GEP->getNoWrapFlags();
71 if (auto *ICmp = dyn_cast<ICmpInst>(I))
72 SameSign = ICmp->hasSameSign();
73}
74
76 if (isa<OverflowingBinaryOperator>(I)) {
77 I->setHasNoUnsignedWrap(NUW);
78 I->setHasNoSignedWrap(NSW);
79 }
80 if (isa<PossiblyExactOperator>(I))
81 I->setIsExact(Exact);
82 if (auto *PDI = dyn_cast<PossiblyDisjointInst>(I))
83 PDI->setIsDisjoint(Disjoint);
84 if (auto *PNI = dyn_cast<PossiblyNonNegInst>(I))
85 PNI->setNonNeg(NNeg);
86 if (isa<TruncInst>(I)) {
87 I->setHasNoUnsignedWrap(NUW);
88 I->setHasNoSignedWrap(NSW);
89 }
90 if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
91 GEP->setNoWrapFlags(GEPNW);
92 if (auto *ICmp = dyn_cast<ICmpInst>(I))
93 ICmp->setSameSign(SameSign);
94}
95
96/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
97/// reusing an existing cast if a suitable one (= dominating IP) exists, or
98/// creating a new one.
99Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
102 // This function must be called with the builder having a valid insertion
103 // point. It doesn't need to be the actual IP where the uses of the returned
104 // cast will be added, but it must dominate such IP.
105 // We use this precondition to produce a cast that will dominate all its
106 // uses. In particular, this is crucial for the case where the builder's
107 // insertion point *is* the point where we were asked to put the cast.
108 // Since we don't know the builder's insertion point is actually
109 // where the uses will be added (only that it dominates it), we are
110 // not allowed to move it.
111 BasicBlock::iterator BIP = Builder.GetInsertPoint();
112
113 Value *Ret = nullptr;
114
115 if (!isa<Constant>(V)) {
116 // Check to see if there is already a cast!
117 for (User *U : V->users()) {
118 if (U->getType() != Ty)
119 continue;
120 CastInst *CI = dyn_cast<CastInst>(U);
121 if (!CI || CI->getOpcode() != Op)
122 continue;
123
124 // Found a suitable cast that is at IP or comes before IP. Use it. Note
125 // that the cast must also properly dominate the Builder's insertion
126 // point.
127 if (IP->getParent() == CI->getParent() && &*BIP != CI &&
128 (&*IP == CI || CI->comesBefore(&*IP))) {
129 Ret = CI;
130 break;
131 }
132 }
133 }
134
135 // Create a new cast.
136 if (!Ret) {
137 SCEVInsertPointGuard Guard(Builder, this);
138 Builder.SetInsertPoint(&*IP);
139 Ret = Builder.CreateCast(Op, V, Ty, V->getName());
140 }
141
142 // We assert at the end of the function since IP might point to an
143 // instruction with different dominance properties than a cast
144 // (an invoke for example) and not dominate BIP (but the cast does).
145 assert(!isa<Instruction>(Ret) ||
146 SE.DT.dominates(cast<Instruction>(Ret), &*BIP));
147
148 return Ret;
149}
150
153 Instruction *MustDominate) const {
154 BasicBlock::iterator IP = ++I->getIterator();
155 if (auto *II = dyn_cast<InvokeInst>(I))
156 IP = II->getNormalDest()->begin();
157
158 while (isa<PHINode>(IP))
159 ++IP;
160
161 if (isa<FuncletPadInst>(IP) || isa<LandingPadInst>(IP)) {
162 ++IP;
163 } else if (isa<CatchSwitchInst>(IP)) {
164 IP = MustDominate->getParent()->getFirstInsertionPt();
165 } else {
166 assert(!IP->isEHPad() && "unexpected eh pad!");
167 }
168
169 // Adjust insert point to be after instructions inserted by the expander, so
170 // we can re-use already inserted instructions. Avoid skipping past the
171 // original \p MustDominate, in case it is an inserted instruction.
172 while (isInsertedInstruction(&*IP) && &*IP != MustDominate)
173 ++IP;
174
175 return IP;
176}
177
179SCEVExpander::GetOptimalInsertionPointForCastOf(Value *V) const {
180 // Cast the argument at the beginning of the entry block, after
181 // any bitcasts of other arguments.
182 if (Argument *A = dyn_cast<Argument>(V)) {
183 BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin();
184 while ((isa<BitCastInst>(IP) &&
185 isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) &&
186 cast<BitCastInst>(IP)->getOperand(0) != A))
187 ++IP;
188 return IP;
189 }
190
191 // Cast the instruction immediately after the instruction.
192 if (Instruction *I = dyn_cast<Instruction>(V))
193 return findInsertPointAfter(I, &*Builder.GetInsertPoint());
194
195 // Otherwise, this must be some kind of a constant,
196 // so let's plop this cast into the function's entry block.
197 assert(isa<Constant>(V) &&
198 "Expected the cast argument to be a global/constant");
199 return Builder.GetInsertBlock()
200 ->getParent()
201 ->getEntryBlock()
203}
204
205/// InsertNoopCastOfTo - Insert a cast of V to the specified type,
206/// which must be possible with a noop cast, doing what we can to share
207/// the casts.
208Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
209 Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false);
210 assert((Op == Instruction::BitCast ||
211 Op == Instruction::PtrToInt ||
212 Op == Instruction::IntToPtr) &&
213 "InsertNoopCastOfTo cannot perform non-noop casts!");
214 assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) &&
215 "InsertNoopCastOfTo cannot change sizes!");
216
217 // inttoptr only works for integral pointers. For non-integral pointers, we
218 // can create a GEP on null with the integral value as index. Note that
219 // it is safe to use GEP of null instead of inttoptr here, because only
220 // expressions already based on a GEP of null should be converted to pointers
221 // during expansion.
222 if (Op == Instruction::IntToPtr) {
223 auto *PtrTy = cast<PointerType>(Ty);
224 if (DL.isNonIntegralPointerType(PtrTy))
225 return Builder.CreatePtrAdd(Constant::getNullValue(PtrTy), V, "scevgep");
226 }
227 // Short-circuit unnecessary bitcasts.
228 if (Op == Instruction::BitCast) {
229 if (V->getType() == Ty)
230 return V;
231 if (CastInst *CI = dyn_cast<CastInst>(V)) {
232 if (CI->getOperand(0)->getType() == Ty)
233 return CI->getOperand(0);
234 }
235 }
236 // Short-circuit unnecessary inttoptr<->ptrtoint casts.
237 if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) &&
238 SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) {
239 if (CastInst *CI = dyn_cast<CastInst>(V))
240 if ((CI->getOpcode() == Instruction::PtrToInt ||
241 CI->getOpcode() == Instruction::IntToPtr) &&
242 SE.getTypeSizeInBits(CI->getType()) ==
244 return CI->getOperand(0);
245 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
246 if ((CE->getOpcode() == Instruction::PtrToInt ||
247 CE->getOpcode() == Instruction::IntToPtr) &&
248 SE.getTypeSizeInBits(CE->getType()) ==
249 SE.getTypeSizeInBits(CE->getOperand(0)->getType()))
250 return CE->getOperand(0);
251 }
252
253 // Fold a cast of a constant.
254 if (Constant *C = dyn_cast<Constant>(V))
255 return ConstantExpr::getCast(Op, C, Ty);
256
257 // Try to reuse existing cast, or insert one.
258 return ReuseOrCreateCast(V, Ty, Op, GetOptimalInsertionPointForCastOf(V));
259}
260
261/// InsertBinop - Insert the specified binary operator, doing a small amount
262/// of work to avoid inserting an obviously redundant operation, and hoisting
263/// to an outer loop when the opportunity is there and it is safe.
264Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
265 Value *LHS, Value *RHS,
266 SCEV::NoWrapFlags Flags, bool IsSafeToHoist) {
267 // Fold a binop with constant operands.
268 if (Constant *CLHS = dyn_cast<Constant>(LHS))
269 if (Constant *CRHS = dyn_cast<Constant>(RHS))
270 if (Constant *Res = ConstantFoldBinaryOpOperands(Opcode, CLHS, CRHS, DL))
271 return Res;
272
273 // Do a quick scan to see if we have this binop nearby. If so, reuse it.
274 unsigned ScanLimit = 6;
275 BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
276 // Scanning starts from the last instruction before the insertion point.
278 if (IP != BlockBegin) {
279 --IP;
280 for (; ScanLimit; --IP, --ScanLimit) {
281 auto canGenerateIncompatiblePoison = [&Flags](Instruction *I) {
282 // Ensure that no-wrap flags match.
283 if (isa<OverflowingBinaryOperator>(I)) {
284 if (I->hasNoSignedWrap() != (Flags & SCEV::FlagNSW))
285 return true;
286 if (I->hasNoUnsignedWrap() != (Flags & SCEV::FlagNUW))
287 return true;
288 }
289 // Conservatively, do not use any instruction which has any of exact
290 // flags installed.
291 if (isa<PossiblyExactOperator>(I) && I->isExact())
292 return true;
293 return false;
294 };
295 if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS &&
296 IP->getOperand(1) == RHS && !canGenerateIncompatiblePoison(&*IP))
297 return &*IP;
298 if (IP == BlockBegin) break;
299 }
300 }
301
302 // Save the original insertion point so we can restore it when we're done.
303 DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc();
304 SCEVInsertPointGuard Guard(Builder, this);
305
306 if (IsSafeToHoist) {
307 // Move the insertion point out of as many loops as we can.
308 while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
309 if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break;
310 BasicBlock *Preheader = L->getLoopPreheader();
311 if (!Preheader) break;
312
313 // Ok, move up a level.
314 Builder.SetInsertPoint(Preheader->getTerminator());
315 }
316 }
317
318 // If we haven't found this binop, insert it.
319 // TODO: Use the Builder, which will make CreateBinOp below fold with
320 // InstSimplifyFolder.
321 Instruction *BO = Builder.Insert(BinaryOperator::Create(Opcode, LHS, RHS));
322 BO->setDebugLoc(Loc);
323 if (Flags & SCEV::FlagNUW)
325 if (Flags & SCEV::FlagNSW)
326 BO->setHasNoSignedWrap();
327
328 return BO;
329}
330
331/// expandAddToGEP - Expand an addition expression with a pointer type into
332/// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps
333/// BasicAliasAnalysis and other passes analyze the result. See the rules
334/// for getelementptr vs. inttoptr in
335/// http://llvm.org/docs/LangRef.html#pointeraliasing
336/// for details.
337///
338/// Design note: The correctness of using getelementptr here depends on
339/// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as
340/// they may introduce pointer arithmetic which may not be safely converted
341/// into getelementptr.
342///
343/// Design note: It might seem desirable for this function to be more
344/// loop-aware. If some of the indices are loop-invariant while others
345/// aren't, it might seem desirable to emit multiple GEPs, keeping the
346/// loop-invariant portions of the overall computation outside the loop.
347/// However, there are a few reasons this is not done here. Hoisting simple
348/// arithmetic is a low-level optimization that often isn't very
349/// important until late in the optimization process. In fact, passes
350/// like InstructionCombining will combine GEPs, even if it means
351/// pushing loop-invariant computation down into loops, so even if the
352/// GEPs were split here, the work would quickly be undone. The
353/// LoopStrengthReduction pass, which is usually run quite late (and
354/// after the last InstructionCombining pass), takes care of hoisting
355/// loop-invariant portions of expressions, after considering what
356/// can be folded using target addressing modes.
357///
358Value *SCEVExpander::expandAddToGEP(const SCEV *Offset, Value *V,
359 SCEV::NoWrapFlags Flags) {
360 assert(!isa<Instruction>(V) ||
361 SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint()));
362
363 Value *Idx = expand(Offset);
366
367 // Fold a GEP with constant operands.
368 if (Constant *CLHS = dyn_cast<Constant>(V))
369 if (Constant *CRHS = dyn_cast<Constant>(Idx))
370 return Builder.CreatePtrAdd(CLHS, CRHS, "", NW);
371
372 // Do a quick scan to see if we have this GEP nearby. If so, reuse it.
373 unsigned ScanLimit = 6;
374 BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
375 // Scanning starts from the last instruction before the insertion point.
377 if (IP != BlockBegin) {
378 --IP;
379 for (; ScanLimit; --IP, --ScanLimit) {
380 if (auto *GEP = dyn_cast<GetElementPtrInst>(IP)) {
381 if (GEP->getPointerOperand() == V &&
382 GEP->getSourceElementType() == Builder.getInt8Ty() &&
383 GEP->getOperand(1) == Idx) {
384 rememberFlags(GEP);
385 GEP->setNoWrapFlags(GEP->getNoWrapFlags() & NW);
386 return &*IP;
387 }
388 }
389 if (IP == BlockBegin) break;
390 }
391 }
392
393 // Save the original insertion point so we can restore it when we're done.
394 SCEVInsertPointGuard Guard(Builder, this);
395
396 // Move the insertion point out of as many loops as we can.
397 while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
398 if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break;
399 BasicBlock *Preheader = L->getLoopPreheader();
400 if (!Preheader) break;
401
402 // Ok, move up a level.
403 Builder.SetInsertPoint(Preheader->getTerminator());
404 }
405
406 // Emit a GEP.
407 return Builder.CreatePtrAdd(V, Idx, "scevgep", NW);
408}
409
410/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for
411/// SCEV expansion. If they are nested, this is the most nested. If they are
412/// neighboring, pick the later.
413static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B,
414 DominatorTree &DT) {
415 if (!A) return B;
416 if (!B) return A;
417 if (A->contains(B)) return B;
418 if (B->contains(A)) return A;
419 if (DT.dominates(A->getHeader(), B->getHeader())) return B;
420 if (DT.dominates(B->getHeader(), A->getHeader())) return A;
421 return A; // Arbitrarily break the tie.
422}
423
424/// getRelevantLoop - Get the most relevant loop associated with the given
425/// expression, according to PickMostRelevantLoop.
426const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
427 // Test whether we've already computed the most relevant loop for this SCEV.
428 auto Pair = RelevantLoops.try_emplace(S);
429 if (!Pair.second)
430 return Pair.first->second;
431
432 switch (S->getSCEVType()) {
433 case scConstant:
434 case scVScale:
435 return nullptr; // A constant has no relevant loops.
436 case scTruncate:
437 case scZeroExtend:
438 case scSignExtend:
439 case scPtrToInt:
440 case scAddExpr:
441 case scMulExpr:
442 case scUDivExpr:
443 case scAddRecExpr:
444 case scUMaxExpr:
445 case scSMaxExpr:
446 case scUMinExpr:
447 case scSMinExpr:
449 const Loop *L = nullptr;
450 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
451 L = AR->getLoop();
452 for (const SCEV *Op : S->operands())
453 L = PickMostRelevantLoop(L, getRelevantLoop(Op), SE.DT);
454 return RelevantLoops[S] = L;
455 }
456 case scUnknown: {
457 const SCEVUnknown *U = cast<SCEVUnknown>(S);
458 if (const Instruction *I = dyn_cast<Instruction>(U->getValue()))
459 return Pair.first->second = SE.LI.getLoopFor(I->getParent());
460 // A non-instruction has no relevant loops.
461 return nullptr;
462 }
464 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
465 }
466 llvm_unreachable("Unexpected SCEV type!");
467}
468
469namespace {
470
471/// LoopCompare - Compare loops by PickMostRelevantLoop.
472class LoopCompare {
473 DominatorTree &DT;
474public:
475 explicit LoopCompare(DominatorTree &dt) : DT(dt) {}
476
477 bool operator()(std::pair<const Loop *, const SCEV *> LHS,
478 std::pair<const Loop *, const SCEV *> RHS) const {
479 // Keep pointer operands sorted at the end.
480 if (LHS.second->getType()->isPointerTy() !=
481 RHS.second->getType()->isPointerTy())
482 return LHS.second->getType()->isPointerTy();
483
484 // Compare loops with PickMostRelevantLoop.
485 if (LHS.first != RHS.first)
486 return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first;
487
488 // If one operand is a non-constant negative and the other is not,
489 // put the non-constant negative on the right so that a sub can
490 // be used instead of a negate and add.
491 if (LHS.second->isNonConstantNegative()) {
492 if (!RHS.second->isNonConstantNegative())
493 return false;
494 } else if (RHS.second->isNonConstantNegative())
495 return true;
496
497 // Otherwise they are equivalent according to this comparison.
498 return false;
499 }
500};
501
502}
503
504Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
505 // Recognize the canonical representation of an unsimplifed urem.
506 const SCEV *URemLHS = nullptr;
507 const SCEV *URemRHS = nullptr;
508 if (SE.matchURem(S, URemLHS, URemRHS)) {
509 Value *LHS = expand(URemLHS);
510 Value *RHS = expand(URemRHS);
511 return InsertBinop(Instruction::URem, LHS, RHS, SCEV::FlagAnyWrap,
512 /*IsSafeToHoist*/ false);
513 }
514
515 // Collect all the add operands in a loop, along with their associated loops.
516 // Iterate in reverse so that constants are emitted last, all else equal, and
517 // so that pointer operands are inserted first, which the code below relies on
518 // to form more involved GEPs.
520 for (const SCEV *Op : reverse(S->operands()))
521 OpsAndLoops.push_back(std::make_pair(getRelevantLoop(Op), Op));
522
523 // Sort by loop. Use a stable sort so that constants follow non-constants and
524 // pointer operands precede non-pointer operands.
525 llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT));
526
527 // Emit instructions to add all the operands. Hoist as much as possible
528 // out of loops, and form meaningful getelementptrs where possible.
529 Value *Sum = nullptr;
530 for (auto I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E;) {
531 const Loop *CurLoop = I->first;
532 const SCEV *Op = I->second;
533 if (!Sum) {
534 // This is the first operand. Just expand it.
535 Sum = expand(Op);
536 ++I;
537 continue;
538 }
539
540 assert(!Op->getType()->isPointerTy() && "Only first op can be pointer");
541 if (isa<PointerType>(Sum->getType())) {
542 // The running sum expression is a pointer. Try to form a getelementptr
543 // at this level with that as the base.
545 for (; I != E && I->first == CurLoop; ++I) {
546 // If the operand is SCEVUnknown and not instructions, peek through
547 // it, to enable more of it to be folded into the GEP.
548 const SCEV *X = I->second;
549 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(X))
550 if (!isa<Instruction>(U->getValue()))
551 X = SE.getSCEV(U->getValue());
552 NewOps.push_back(X);
553 }
554 Sum = expandAddToGEP(SE.getAddExpr(NewOps), Sum, S->getNoWrapFlags());
555 } else if (Op->isNonConstantNegative()) {
556 // Instead of doing a negate and add, just do a subtract.
557 Value *W = expand(SE.getNegativeSCEV(Op));
558 Sum = InsertBinop(Instruction::Sub, Sum, W, SCEV::FlagAnyWrap,
559 /*IsSafeToHoist*/ true);
560 ++I;
561 } else {
562 // A simple add.
563 Value *W = expand(Op);
564 // Canonicalize a constant to the RHS.
565 if (isa<Constant>(Sum))
566 std::swap(Sum, W);
567 Sum = InsertBinop(Instruction::Add, Sum, W, S->getNoWrapFlags(),
568 /*IsSafeToHoist*/ true);
569 ++I;
570 }
571 }
572
573 return Sum;
574}
575
576Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
577 Type *Ty = S->getType();
578
579 // Collect all the mul operands in a loop, along with their associated loops.
580 // Iterate in reverse so that constants are emitted last, all else equal.
582 for (const SCEV *Op : reverse(S->operands()))
583 OpsAndLoops.push_back(std::make_pair(getRelevantLoop(Op), Op));
584
585 // Sort by loop. Use a stable sort so that constants follow non-constants.
586 llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT));
587
588 // Emit instructions to mul all the operands. Hoist as much as possible
589 // out of loops.
590 Value *Prod = nullptr;
591 auto I = OpsAndLoops.begin();
592
593 // Expand the calculation of X pow N in the following manner:
594 // Let N = P1 + P2 + ... + PK, where all P are powers of 2. Then:
595 // X pow N = (X pow P1) * (X pow P2) * ... * (X pow PK).
596 const auto ExpandOpBinPowN = [this, &I, &OpsAndLoops]() {
597 auto E = I;
598 // Calculate how many times the same operand from the same loop is included
599 // into this power.
600 uint64_t Exponent = 0;
601 const uint64_t MaxExponent = UINT64_MAX >> 1;
602 // No one sane will ever try to calculate such huge exponents, but if we
603 // need this, we stop on UINT64_MAX / 2 because we need to exit the loop
604 // below when the power of 2 exceeds our Exponent, and we want it to be
605 // 1u << 31 at most to not deal with unsigned overflow.
606 while (E != OpsAndLoops.end() && *I == *E && Exponent != MaxExponent) {
607 ++Exponent;
608 ++E;
609 }
610 assert(Exponent > 0 && "Trying to calculate a zeroth exponent of operand?");
611
612 // Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them
613 // that are needed into the result.
614 Value *P = expand(I->second);
615 Value *Result = nullptr;
616 if (Exponent & 1)
617 Result = P;
618 for (uint64_t BinExp = 2; BinExp <= Exponent; BinExp <<= 1) {
619 P = InsertBinop(Instruction::Mul, P, P, SCEV::FlagAnyWrap,
620 /*IsSafeToHoist*/ true);
621 if (Exponent & BinExp)
622 Result = Result ? InsertBinop(Instruction::Mul, Result, P,
624 /*IsSafeToHoist*/ true)
625 : P;
626 }
627
628 I = E;
629 assert(Result && "Nothing was expanded?");
630 return Result;
631 };
632
633 while (I != OpsAndLoops.end()) {
634 if (!Prod) {
635 // This is the first operand. Just expand it.
636 Prod = ExpandOpBinPowN();
637 } else if (I->second->isAllOnesValue()) {
638 // Instead of doing a multiply by negative one, just do a negate.
639 Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod,
640 SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
641 ++I;
642 } else {
643 // A simple mul.
644 Value *W = ExpandOpBinPowN();
645 // Canonicalize a constant to the RHS.
646 if (isa<Constant>(Prod)) std::swap(Prod, W);
647 const APInt *RHS;
648 if (match(W, m_Power2(RHS))) {
649 // Canonicalize Prod*(1<<C) to Prod<<C.
650 assert(!Ty->isVectorTy() && "vector types are not SCEVable");
651 auto NWFlags = S->getNoWrapFlags();
652 // clear nsw flag if shl will produce poison value.
653 if (RHS->logBase2() == RHS->getBitWidth() - 1)
654 NWFlags = ScalarEvolution::clearFlags(NWFlags, SCEV::FlagNSW);
655 Prod = InsertBinop(Instruction::Shl, Prod,
656 ConstantInt::get(Ty, RHS->logBase2()), NWFlags,
657 /*IsSafeToHoist*/ true);
658 } else {
659 Prod = InsertBinop(Instruction::Mul, Prod, W, S->getNoWrapFlags(),
660 /*IsSafeToHoist*/ true);
661 }
662 }
663 }
664
665 return Prod;
666}
667
668Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
669 Value *LHS = expand(S->getLHS());
670 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) {
671 const APInt &RHS = SC->getAPInt();
672 if (RHS.isPowerOf2())
673 return InsertBinop(Instruction::LShr, LHS,
674 ConstantInt::get(SC->getType(), RHS.logBase2()),
675 SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
676 }
677
678 const SCEV *RHSExpr = S->getRHS();
679 Value *RHS = expand(RHSExpr);
680 if (SafeUDivMode) {
681 bool GuaranteedNotPoison =
682 ScalarEvolution::isGuaranteedNotToBePoison(RHSExpr);
683 if (!GuaranteedNotPoison)
684 RHS = Builder.CreateFreeze(RHS);
685
686 // We need an umax if either RHSExpr is not known to be zero, or if it is
687 // not guaranteed to be non-poison. In the later case, the frozen poison may
688 // be 0.
689 if (!SE.isKnownNonZero(RHSExpr) || !GuaranteedNotPoison)
690 RHS = Builder.CreateIntrinsic(RHS->getType(), Intrinsic::umax,
691 {RHS, ConstantInt::get(RHS->getType(), 1)});
692 }
693 return InsertBinop(Instruction::UDiv, LHS, RHS, SCEV::FlagAnyWrap,
694 /*IsSafeToHoist*/ SE.isKnownNonZero(S->getRHS()));
695}
696
697/// Determine if this is a well-behaved chain of instructions leading back to
698/// the PHI. If so, it may be reused by expanded expressions.
699bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV,
700 const Loop *L) {
701 if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) ||
702 (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV)))
703 return false;
704 // If any of the operands don't dominate the insert position, bail.
705 // Addrec operands are always loop-invariant, so this can only happen
706 // if there are instructions which haven't been hoisted.
707 if (L == IVIncInsertLoop) {
708 for (Use &Op : llvm::drop_begin(IncV->operands()))
709 if (Instruction *OInst = dyn_cast<Instruction>(Op))
710 if (!SE.DT.dominates(OInst, IVIncInsertPos))
711 return false;
712 }
713 // Advance to the next instruction.
714 IncV = dyn_cast<Instruction>(IncV->getOperand(0));
715 if (!IncV)
716 return false;
717
718 if (IncV->mayHaveSideEffects())
719 return false;
720
721 if (IncV == PN)
722 return true;
723
724 return isNormalAddRecExprPHI(PN, IncV, L);
725}
726
727/// getIVIncOperand returns an induction variable increment's induction
728/// variable operand.
729///
730/// If allowScale is set, any type of GEP is allowed as long as the nonIV
731/// operands dominate InsertPos.
732///
733/// If allowScale is not set, ensure that a GEP increment conforms to one of the
734/// simple patterns generated by getAddRecExprPHILiterally and
735/// expandAddtoGEP. If the pattern isn't recognized, return NULL.
737 Instruction *InsertPos,
738 bool allowScale) {
739 if (IncV == InsertPos)
740 return nullptr;
741
742 switch (IncV->getOpcode()) {
743 default:
744 return nullptr;
745 // Check for a simple Add/Sub or GEP of a loop invariant step.
746 case Instruction::Add:
747 case Instruction::Sub: {
748 Instruction *OInst = dyn_cast<Instruction>(IncV->getOperand(1));
749 if (!OInst || SE.DT.dominates(OInst, InsertPos))
750 return dyn_cast<Instruction>(IncV->getOperand(0));
751 return nullptr;
752 }
753 case Instruction::BitCast:
754 return dyn_cast<Instruction>(IncV->getOperand(0));
755 case Instruction::GetElementPtr:
756 for (Use &U : llvm::drop_begin(IncV->operands())) {
757 if (isa<Constant>(U))
758 continue;
759 if (Instruction *OInst = dyn_cast<Instruction>(U)) {
760 if (!SE.DT.dominates(OInst, InsertPos))
761 return nullptr;
762 }
763 if (allowScale) {
764 // allow any kind of GEP as long as it can be hoisted.
765 continue;
766 }
767 // GEPs produced by SCEVExpander use i8 element type.
768 if (!cast<GEPOperator>(IncV)->getSourceElementType()->isIntegerTy(8))
769 return nullptr;
770 break;
771 }
772 return dyn_cast<Instruction>(IncV->getOperand(0));
773 }
774}
775
776/// If the insert point of the current builder or any of the builders on the
777/// stack of saved builders has 'I' as its insert point, update it to point to
778/// the instruction after 'I'. This is intended to be used when the instruction
779/// 'I' is being moved. If this fixup is not done and 'I' is moved to a
780/// different block, the inconsistent insert point (with a mismatched
781/// Instruction and Block) can lead to an instruction being inserted in a block
782/// other than its parent.
783void SCEVExpander::fixupInsertPoints(Instruction *I) {
785 BasicBlock::iterator NewInsertPt = std::next(It);
786 if (Builder.GetInsertPoint() == It)
787 Builder.SetInsertPoint(&*NewInsertPt);
788 for (auto *InsertPtGuard : InsertPointGuards)
789 if (InsertPtGuard->GetInsertPoint() == It)
790 InsertPtGuard->SetInsertPoint(NewInsertPt);
791}
792
793/// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make
794/// it available to other uses in this loop. Recursively hoist any operands,
795/// until we reach a value that dominates InsertPos.
797 bool RecomputePoisonFlags) {
798 auto FixupPoisonFlags = [this](Instruction *I) {
799 // Drop flags that are potentially inferred from old context and infer flags
800 // in new context.
801 rememberFlags(I);
802 I->dropPoisonGeneratingFlags();
803 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I))
804 if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) {
805 auto *BO = cast<BinaryOperator>(I);
810 }
811 };
812
813 if (SE.DT.dominates(IncV, InsertPos)) {
814 if (RecomputePoisonFlags)
815 FixupPoisonFlags(IncV);
816 return true;
817 }
818
819 // InsertPos must itself dominate IncV so that IncV's new position satisfies
820 // its existing users.
821 if (isa<PHINode>(InsertPos) ||
822 !SE.DT.dominates(InsertPos->getParent(), IncV->getParent()))
823 return false;
824
825 if (!SE.LI.movementPreservesLCSSAForm(IncV, InsertPos))
826 return false;
827
828 // Check that the chain of IV operands leading back to Phi can be hoisted.
830 for(;;) {
831 Instruction *Oper = getIVIncOperand(IncV, InsertPos, /*allowScale*/true);
832 if (!Oper)
833 return false;
834 // IncV is safe to hoist.
835 IVIncs.push_back(IncV);
836 IncV = Oper;
837 if (SE.DT.dominates(IncV, InsertPos))
838 break;
839 }
840 for (Instruction *I : llvm::reverse(IVIncs)) {
841 fixupInsertPoints(I);
842 I->moveBefore(InsertPos->getIterator());
843 if (RecomputePoisonFlags)
844 FixupPoisonFlags(I);
845 }
846 return true;
847}
848
850 PHINode *WidePhi,
851 Instruction *OrigInc,
852 Instruction *WideInc) {
853 return match(OrigInc, m_c_BinOp(m_Specific(OrigPhi), m_Value())) &&
854 match(WideInc, m_c_BinOp(m_Specific(WidePhi), m_Value())) &&
855 OrigInc->getOpcode() == WideInc->getOpcode();
856}
857
858/// Determine if this cyclic phi is in a form that would have been generated by
859/// LSR. We don't care if the phi was actually expanded in this pass, as long
860/// as it is in a low-cost form, for example, no implied multiplication. This
861/// should match any patterns generated by getAddRecExprPHILiterally and
862/// expandAddtoGEP.
863bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV,
864 const Loop *L) {
865 for(Instruction *IVOper = IncV;
866 (IVOper = getIVIncOperand(IVOper, L->getLoopPreheader()->getTerminator(),
867 /*allowScale=*/false));) {
868 if (IVOper == PN)
869 return true;
870 }
871 return false;
872}
873
874/// expandIVInc - Expand an IV increment at Builder's current InsertPos.
875/// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may
876/// need to materialize IV increments elsewhere to handle difficult situations.
877Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
878 bool useSubtract) {
879 Value *IncV;
880 // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
881 if (PN->getType()->isPointerTy()) {
882 // TODO: Change name to IVName.iv.next.
883 IncV = Builder.CreatePtrAdd(PN, StepV, "scevgep");
884 } else {
885 IncV = useSubtract ?
886 Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") :
887 Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next");
888 }
889 return IncV;
890}
891
892/// Check whether we can cheaply express the requested SCEV in terms of
893/// the available PHI SCEV by truncation and/or inversion of the step.
895 const SCEVAddRecExpr *Phi,
896 const SCEVAddRecExpr *Requested,
897 bool &InvertStep) {
898 // We can't transform to match a pointer PHI.
899 Type *PhiTy = Phi->getType();
900 Type *RequestedTy = Requested->getType();
901 if (PhiTy->isPointerTy() || RequestedTy->isPointerTy())
902 return false;
903
904 if (RequestedTy->getIntegerBitWidth() > PhiTy->getIntegerBitWidth())
905 return false;
906
907 // Try truncate it if necessary.
908 Phi = dyn_cast<SCEVAddRecExpr>(SE.getTruncateOrNoop(Phi, RequestedTy));
909 if (!Phi)
910 return false;
911
912 // Check whether truncation will help.
913 if (Phi == Requested) {
914 InvertStep = false;
915 return true;
916 }
917
918 // Check whether inverting will help: {R,+,-1} == R - {0,+,1}.
919 if (SE.getMinusSCEV(Requested->getStart(), Requested) == Phi) {
920 InvertStep = true;
921 return true;
922 }
923
924 return false;
925}
926
927static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
928 if (!isa<IntegerType>(AR->getType()))
929 return false;
930
931 unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth();
932 Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2);
933 const SCEV *Step = AR->getStepRecurrence(SE);
934 const SCEV *OpAfterExtend = SE.getAddExpr(SE.getSignExtendExpr(Step, WideTy),
935 SE.getSignExtendExpr(AR, WideTy));
936 const SCEV *ExtendAfterOp =
937 SE.getSignExtendExpr(SE.getAddExpr(AR, Step), WideTy);
938 return ExtendAfterOp == OpAfterExtend;
939}
940
941static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
942 if (!isa<IntegerType>(AR->getType()))
943 return false;
944
945 unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth();
946 Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2);
947 const SCEV *Step = AR->getStepRecurrence(SE);
948 const SCEV *OpAfterExtend = SE.getAddExpr(SE.getZeroExtendExpr(Step, WideTy),
949 SE.getZeroExtendExpr(AR, WideTy));
950 const SCEV *ExtendAfterOp =
951 SE.getZeroExtendExpr(SE.getAddExpr(AR, Step), WideTy);
952 return ExtendAfterOp == OpAfterExtend;
953}
954
955/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
956/// the base addrec, which is the addrec without any non-loop-dominating
957/// values, and return the PHI.
958PHINode *
959SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
960 const Loop *L, Type *&TruncTy,
961 bool &InvertStep) {
962 assert((!IVIncInsertLoop || IVIncInsertPos) &&
963 "Uninitialized insert position");
964
965 // Reuse a previously-inserted PHI, if present.
966 BasicBlock *LatchBlock = L->getLoopLatch();
967 if (LatchBlock) {
968 PHINode *AddRecPhiMatch = nullptr;
969 Instruction *IncV = nullptr;
970 TruncTy = nullptr;
971 InvertStep = false;
972
973 // Only try partially matching scevs that need truncation and/or
974 // step-inversion if we know this loop is outside the current loop.
975 bool TryNonMatchingSCEV =
976 IVIncInsertLoop &&
977 SE.DT.properlyDominates(LatchBlock, IVIncInsertLoop->getHeader());
978
979 for (PHINode &PN : L->getHeader()->phis()) {
980 if (!SE.isSCEVable(PN.getType()))
981 continue;
982
983 // We should not look for a incomplete PHI. Getting SCEV for a incomplete
984 // PHI has no meaning at all.
985 if (!PN.isComplete()) {
987 DebugType, dbgs() << "One incomplete PHI is found: " << PN << "\n");
988 continue;
989 }
990
991 const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&PN));
992 if (!PhiSCEV)
993 continue;
994
995 bool IsMatchingSCEV = PhiSCEV == Normalized;
996 // We only handle truncation and inversion of phi recurrences for the
997 // expanded expression if the expanded expression's loop dominates the
998 // loop we insert to. Check now, so we can bail out early.
999 if (!IsMatchingSCEV && !TryNonMatchingSCEV)
1000 continue;
1001
1002 // TODO: this possibly can be reworked to avoid this cast at all.
1003 Instruction *TempIncV =
1004 dyn_cast<Instruction>(PN.getIncomingValueForBlock(LatchBlock));
1005 if (!TempIncV)
1006 continue;
1007
1008 // Check whether we can reuse this PHI node.
1009 if (LSRMode) {
1010 if (!isExpandedAddRecExprPHI(&PN, TempIncV, L))
1011 continue;
1012 } else {
1013 if (!isNormalAddRecExprPHI(&PN, TempIncV, L))
1014 continue;
1015 }
1016
1017 // Stop if we have found an exact match SCEV.
1018 if (IsMatchingSCEV) {
1019 IncV = TempIncV;
1020 TruncTy = nullptr;
1021 InvertStep = false;
1022 AddRecPhiMatch = &PN;
1023 break;
1024 }
1025
1026 // Try whether the phi can be translated into the requested form
1027 // (truncated and/or offset by a constant).
1028 if ((!TruncTy || InvertStep) &&
1029 canBeCheaplyTransformed(SE, PhiSCEV, Normalized, InvertStep)) {
1030 // Record the phi node. But don't stop we might find an exact match
1031 // later.
1032 AddRecPhiMatch = &PN;
1033 IncV = TempIncV;
1034 TruncTy = Normalized->getType();
1035 }
1036 }
1037
1038 if (AddRecPhiMatch) {
1039 // Ok, the add recurrence looks usable.
1040 // Remember this PHI, even in post-inc mode.
1041 InsertedValues.insert(AddRecPhiMatch);
1042 // Remember the increment.
1043 rememberInstruction(IncV);
1044 // Those values were not actually inserted but re-used.
1045 ReusedValues.insert(AddRecPhiMatch);
1046 ReusedValues.insert(IncV);
1047 return AddRecPhiMatch;
1048 }
1049 }
1050
1051 // Save the original insertion point so we can restore it when we're done.
1052 SCEVInsertPointGuard Guard(Builder, this);
1053
1054 // Another AddRec may need to be recursively expanded below. For example, if
1055 // this AddRec is quadratic, the StepV may itself be an AddRec in this
1056 // loop. Remove this loop from the PostIncLoops set before expanding such
1057 // AddRecs. Otherwise, we cannot find a valid position for the step
1058 // (i.e. StepV can never dominate its loop header). Ideally, we could do
1059 // SavedIncLoops.swap(PostIncLoops), but we generally have a single element,
1060 // so it's not worth implementing SmallPtrSet::swap.
1061 PostIncLoopSet SavedPostIncLoops = PostIncLoops;
1062 PostIncLoops.clear();
1063
1064 // Expand code for the start value into the loop preheader.
1065 assert(L->getLoopPreheader() &&
1066 "Can't expand add recurrences without a loop preheader!");
1067 Value *StartV =
1068 expand(Normalized->getStart(), L->getLoopPreheader()->getTerminator());
1069
1070 // StartV must have been be inserted into L's preheader to dominate the new
1071 // phi.
1072 assert(!isa<Instruction>(StartV) ||
1073 SE.DT.properlyDominates(cast<Instruction>(StartV)->getParent(),
1074 L->getHeader()));
1075
1076 // Expand code for the step value. Do this before creating the PHI so that PHI
1077 // reuse code doesn't see an incomplete PHI.
1078 const SCEV *Step = Normalized->getStepRecurrence(SE);
1079 Type *ExpandTy = Normalized->getType();
1080 // If the stride is negative, insert a sub instead of an add for the increment
1081 // (unless it's a constant, because subtracts of constants are canonicalized
1082 // to adds).
1083 bool useSubtract = !ExpandTy->isPointerTy() && Step->isNonConstantNegative();
1084 if (useSubtract)
1085 Step = SE.getNegativeSCEV(Step);
1086 // Expand the step somewhere that dominates the loop header.
1087 Value *StepV = expand(Step, L->getHeader()->getFirstInsertionPt());
1088
1089 // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if
1090 // we actually do emit an addition. It does not apply if we emit a
1091 // subtraction.
1092 bool IncrementIsNUW = !useSubtract && IsIncrementNUW(SE, Normalized);
1093 bool IncrementIsNSW = !useSubtract && IsIncrementNSW(SE, Normalized);
1094
1095 // Create the PHI.
1096 BasicBlock *Header = L->getHeader();
1097 Builder.SetInsertPoint(Header, Header->begin());
1098 PHINode *PN =
1099 Builder.CreatePHI(ExpandTy, pred_size(Header), Twine(IVName) + ".iv");
1100
1101 // Create the step instructions and populate the PHI.
1102 for (BasicBlock *Pred : predecessors(Header)) {
1103 // Add a start value.
1104 if (!L->contains(Pred)) {
1105 PN->addIncoming(StartV, Pred);
1106 continue;
1107 }
1108
1109 // Create a step value and add it to the PHI.
1110 // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the
1111 // instructions at IVIncInsertPos.
1112 Instruction *InsertPos = L == IVIncInsertLoop ?
1113 IVIncInsertPos : Pred->getTerminator();
1114 Builder.SetInsertPoint(InsertPos);
1115 Value *IncV = expandIVInc(PN, StepV, L, useSubtract);
1116
1117 if (isa<OverflowingBinaryOperator>(IncV)) {
1118 if (IncrementIsNUW)
1119 cast<BinaryOperator>(IncV)->setHasNoUnsignedWrap();
1120 if (IncrementIsNSW)
1121 cast<BinaryOperator>(IncV)->setHasNoSignedWrap();
1122 }
1123 PN->addIncoming(IncV, Pred);
1124 }
1125
1126 // After expanding subexpressions, restore the PostIncLoops set so the caller
1127 // can ensure that IVIncrement dominates the current uses.
1128 PostIncLoops = SavedPostIncLoops;
1129
1130 // Remember this PHI, even in post-inc mode. LSR SCEV-based salvaging is most
1131 // effective when we are able to use an IV inserted here, so record it.
1132 InsertedValues.insert(PN);
1133 InsertedIVs.push_back(PN);
1134 return PN;
1135}
1136
1137Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
1138 const Loop *L = S->getLoop();
1139
1140 // Determine a normalized form of this expression, which is the expression
1141 // before any post-inc adjustment is made.
1142 const SCEVAddRecExpr *Normalized = S;
1143 if (PostIncLoops.count(L)) {
1145 Loops.insert(L);
1146 Normalized = cast<SCEVAddRecExpr>(
1147 normalizeForPostIncUse(S, Loops, SE, /*CheckInvertible=*/false));
1148 }
1149
1150 [[maybe_unused]] const SCEV *Start = Normalized->getStart();
1151 const SCEV *Step = Normalized->getStepRecurrence(SE);
1152 assert(SE.properlyDominates(Start, L->getHeader()) &&
1153 "Start does not properly dominate loop header");
1154 assert(SE.dominates(Step, L->getHeader()) && "Step not dominate loop header");
1155
1156 // In some cases, we decide to reuse an existing phi node but need to truncate
1157 // it and/or invert the step.
1158 Type *TruncTy = nullptr;
1159 bool InvertStep = false;
1160 PHINode *PN = getAddRecExprPHILiterally(Normalized, L, TruncTy, InvertStep);
1161
1162 // Accommodate post-inc mode, if necessary.
1163 Value *Result;
1164 if (!PostIncLoops.count(L))
1165 Result = PN;
1166 else {
1167 // In PostInc mode, use the post-incremented value.
1168 BasicBlock *LatchBlock = L->getLoopLatch();
1169 assert(LatchBlock && "PostInc mode requires a unique loop latch!");
1170 Result = PN->getIncomingValueForBlock(LatchBlock);
1171
1172 // We might be introducing a new use of the post-inc IV that is not poison
1173 // safe, in which case we should drop poison generating flags. Only keep
1174 // those flags for which SCEV has proven that they always hold.
1175 if (isa<OverflowingBinaryOperator>(Result)) {
1176 auto *I = cast<Instruction>(Result);
1177 if (!S->hasNoUnsignedWrap())
1178 I->setHasNoUnsignedWrap(false);
1179 if (!S->hasNoSignedWrap())
1180 I->setHasNoSignedWrap(false);
1181 }
1182
1183 // For an expansion to use the postinc form, the client must call
1184 // expandCodeFor with an InsertPoint that is either outside the PostIncLoop
1185 // or dominated by IVIncInsertPos.
1186 if (isa<Instruction>(Result) &&
1187 !SE.DT.dominates(cast<Instruction>(Result),
1188 &*Builder.GetInsertPoint())) {
1189 // The induction variable's postinc expansion does not dominate this use.
1190 // IVUsers tries to prevent this case, so it is rare. However, it can
1191 // happen when an IVUser outside the loop is not dominated by the latch
1192 // block. Adjusting IVIncInsertPos before expansion begins cannot handle
1193 // all cases. Consider a phi outside whose operand is replaced during
1194 // expansion with the value of the postinc user. Without fundamentally
1195 // changing the way postinc users are tracked, the only remedy is
1196 // inserting an extra IV increment. StepV might fold into PostLoopOffset,
1197 // but hopefully expandCodeFor handles that.
1198 bool useSubtract =
1199 !S->getType()->isPointerTy() && Step->isNonConstantNegative();
1200 if (useSubtract)
1201 Step = SE.getNegativeSCEV(Step);
1202 Value *StepV;
1203 {
1204 // Expand the step somewhere that dominates the loop header.
1205 SCEVInsertPointGuard Guard(Builder, this);
1206 StepV = expand(Step, L->getHeader()->getFirstInsertionPt());
1207 }
1208 Result = expandIVInc(PN, StepV, L, useSubtract);
1209 }
1210 }
1211
1212 // We have decided to reuse an induction variable of a dominating loop. Apply
1213 // truncation and/or inversion of the step.
1214 if (TruncTy) {
1215 // Truncate the result.
1216 if (TruncTy != Result->getType())
1217 Result = Builder.CreateTrunc(Result, TruncTy);
1218
1219 // Invert the result.
1220 if (InvertStep)
1221 Result = Builder.CreateSub(expand(Normalized->getStart()), Result);
1222 }
1223
1224 return Result;
1225}
1226
1227Value *SCEVExpander::tryToReuseLCSSAPhi(const SCEVAddRecExpr *S) {
1228 Type *STy = S->getType();
1229 const Loop *L = S->getLoop();
1230 BasicBlock *EB = L->getExitBlock();
1231 if (!EB || !EB->getSinglePredecessor() ||
1232 !SE.DT.dominates(EB, Builder.GetInsertBlock()))
1233 return nullptr;
1234
1235 for (auto &PN : EB->phis()) {
1236 if (!SE.isSCEVable(PN.getType()))
1237 continue;
1238 auto *ExitSCEV = SE.getSCEV(&PN);
1239 if (!isa<SCEVAddRecExpr>(ExitSCEV))
1240 continue;
1241 Type *PhiTy = PN.getType();
1242 if (STy->isIntegerTy() && PhiTy->isPointerTy()) {
1243 ExitSCEV = SE.getPtrToIntExpr(ExitSCEV, STy);
1244 if (isa<SCEVCouldNotCompute>(ExitSCEV))
1245 continue;
1246 } else if (S->getType() != PN.getType()) {
1247 continue;
1248 }
1249
1250 // Check if we can re-use the existing PN, by adjusting it with an expanded
1251 // offset, if the offset is simpler.
1252 const SCEV *Diff = SE.getMinusSCEV(S, ExitSCEV);
1253 const SCEV *Op = Diff;
1257 if (!isa<SCEVConstant, SCEVUnknown>(Op))
1258 continue;
1259
1260 assert(Diff->getType()->isIntegerTy() &&
1261 "difference must be of integer type");
1262 Value *DiffV = expand(Diff);
1263 Value *BaseV = fixupLCSSAFormFor(&PN);
1264 if (PhiTy->isPointerTy()) {
1265 if (STy->isPointerTy())
1266 return Builder.CreatePtrAdd(BaseV, DiffV);
1267 BaseV = Builder.CreatePtrToInt(BaseV, DiffV->getType());
1268 }
1269 return Builder.CreateAdd(BaseV, DiffV);
1270 }
1271
1272 return nullptr;
1273}
1274
1275Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
1276 // In canonical mode we compute the addrec as an expression of a canonical IV
1277 // using evaluateAtIteration and expand the resulting SCEV expression. This
1278 // way we avoid introducing new IVs to carry on the computation of the addrec
1279 // throughout the loop.
1280 //
1281 // For nested addrecs evaluateAtIteration might need a canonical IV of a
1282 // type wider than the addrec itself. Emitting a canonical IV of the
1283 // proper type might produce non-legal types, for example expanding an i64
1284 // {0,+,2,+,1} addrec would need an i65 canonical IV. To avoid this just fall
1285 // back to non-canonical mode for nested addrecs.
1286 if (!CanonicalMode || (S->getNumOperands() > 2))
1287 return expandAddRecExprLiterally(S);
1288
1289 Type *Ty = SE.getEffectiveSCEVType(S->getType());
1290 const Loop *L = S->getLoop();
1291
1292 // First check for an existing canonical IV in a suitable type.
1293 PHINode *CanonicalIV = nullptr;
1294 if (PHINode *PN = L->getCanonicalInductionVariable())
1295 if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty))
1296 CanonicalIV = PN;
1297
1298 // Rewrite an AddRec in terms of the canonical induction variable, if
1299 // its type is more narrow.
1300 if (CanonicalIV &&
1301 SE.getTypeSizeInBits(CanonicalIV->getType()) > SE.getTypeSizeInBits(Ty) &&
1302 !S->getType()->isPointerTy()) {
1304 for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i)
1305 NewOps[i] = SE.getAnyExtendExpr(S->getOperand(i), CanonicalIV->getType());
1306 Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(),
1308 BasicBlock::iterator NewInsertPt =
1309 findInsertPointAfter(cast<Instruction>(V), &*Builder.GetInsertPoint());
1310 V = expand(SE.getTruncateExpr(SE.getUnknown(V), Ty), NewInsertPt);
1311 return V;
1312 }
1313
1314 // If S is expanded outside the defining loop, check if there is a
1315 // matching LCSSA phi node for it.
1316 if (Value *V = tryToReuseLCSSAPhi(S))
1317 return V;
1318
1319 // {X,+,F} --> X + {0,+,F}
1320 if (!S->getStart()->isZero()) {
1321 if (isa<PointerType>(S->getType())) {
1322 Value *StartV = expand(SE.getPointerBase(S));
1323 return expandAddToGEP(SE.removePointerBase(S), StartV,
1325 }
1326
1328 NewOps[0] = SE.getConstant(Ty, 0);
1329 const SCEV *Rest = SE.getAddRecExpr(NewOps, L,
1331
1332 // Just do a normal add. Pre-expand the operands to suppress folding.
1333 //
1334 // The LHS and RHS values are factored out of the expand call to make the
1335 // output independent of the argument evaluation order.
1336 const SCEV *AddExprLHS = SE.getUnknown(expand(S->getStart()));
1337 const SCEV *AddExprRHS = SE.getUnknown(expand(Rest));
1338 return expand(SE.getAddExpr(AddExprLHS, AddExprRHS));
1339 }
1340
1341 // If we don't yet have a canonical IV, create one.
1342 if (!CanonicalIV) {
1343 // Create and insert the PHI node for the induction variable in the
1344 // specified loop.
1345 BasicBlock *Header = L->getHeader();
1346 pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
1347 CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar");
1348 CanonicalIV->insertBefore(Header->begin());
1349 rememberInstruction(CanonicalIV);
1350
1352 Constant *One = ConstantInt::get(Ty, 1);
1353 for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
1354 BasicBlock *HP = *HPI;
1355 if (!PredSeen.insert(HP).second) {
1356 // There must be an incoming value for each predecessor, even the
1357 // duplicates!
1358 CanonicalIV->addIncoming(CanonicalIV->getIncomingValueForBlock(HP), HP);
1359 continue;
1360 }
1361
1362 if (L->contains(HP)) {
1363 // Insert a unit add instruction right before the terminator
1364 // corresponding to the back-edge.
1365 Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One,
1366 "indvar.next",
1367 HP->getTerminator()->getIterator());
1368 Add->setDebugLoc(HP->getTerminator()->getDebugLoc());
1369 rememberInstruction(Add);
1370 CanonicalIV->addIncoming(Add, HP);
1371 } else {
1372 CanonicalIV->addIncoming(Constant::getNullValue(Ty), HP);
1373 }
1374 }
1375 }
1376
1377 // {0,+,1} --> Insert a canonical induction variable into the loop!
1378 if (S->isAffine() && S->getOperand(1)->isOne()) {
1379 assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) &&
1380 "IVs with types different from the canonical IV should "
1381 "already have been handled!");
1382 return CanonicalIV;
1383 }
1384
1385 // {0,+,F} --> {0,+,1} * F
1386
1387 // If this is a simple linear addrec, emit it now as a special case.
1388 if (S->isAffine()) // {0,+,F} --> i*F
1389 return
1390 expand(SE.getTruncateOrNoop(
1391 SE.getMulExpr(SE.getUnknown(CanonicalIV),
1393 CanonicalIV->getType())),
1394 Ty));
1395
1396 // If this is a chain of recurrences, turn it into a closed form, using the
1397 // folders, then expandCodeFor the closed form. This allows the folders to
1398 // simplify the expression without having to build a bunch of special code
1399 // into this folder.
1400 const SCEV *IH = SE.getUnknown(CanonicalIV); // Get I as a "symbolic" SCEV.
1401
1402 // Promote S up to the canonical IV type, if the cast is foldable.
1403 const SCEV *NewS = S;
1404 const SCEV *Ext = SE.getNoopOrAnyExtend(S, CanonicalIV->getType());
1405 if (isa<SCEVAddRecExpr>(Ext))
1406 NewS = Ext;
1407
1408 const SCEV *V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE);
1409
1410 // Truncate the result down to the original type, if needed.
1411 const SCEV *T = SE.getTruncateOrNoop(V, Ty);
1412 return expand(T);
1413}
1414
1415Value *SCEVExpander::visitPtrToIntExpr(const SCEVPtrToIntExpr *S) {
1416 Value *V = expand(S->getOperand());
1417 return ReuseOrCreateCast(V, S->getType(), CastInst::PtrToInt,
1418 GetOptimalInsertionPointForCastOf(V));
1419}
1420
1421Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
1422 Value *V = expand(S->getOperand());
1423 return Builder.CreateTrunc(V, S->getType());
1424}
1425
1426Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
1427 Value *V = expand(S->getOperand());
1428 return Builder.CreateZExt(V, S->getType(), "",
1430}
1431
1432Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
1433 Value *V = expand(S->getOperand());
1434 return Builder.CreateSExt(V, S->getType());
1435}
1436
1437Value *SCEVExpander::expandMinMaxExpr(const SCEVNAryExpr *S,
1438 Intrinsic::ID IntrinID, Twine Name,
1439 bool IsSequential) {
1440 bool PrevSafeMode = SafeUDivMode;
1441 SafeUDivMode |= IsSequential;
1442 Value *LHS = expand(S->getOperand(S->getNumOperands() - 1));
1443 Type *Ty = LHS->getType();
1444 if (IsSequential)
1445 LHS = Builder.CreateFreeze(LHS);
1446 for (int i = S->getNumOperands() - 2; i >= 0; --i) {
1447 SafeUDivMode = (IsSequential && i != 0) || PrevSafeMode;
1448 Value *RHS = expand(S->getOperand(i));
1449 if (IsSequential && i != 0)
1450 RHS = Builder.CreateFreeze(RHS);
1451 Value *Sel;
1452 if (Ty->isIntegerTy())
1453 Sel = Builder.CreateIntrinsic(IntrinID, {Ty}, {LHS, RHS},
1454 /*FMFSource=*/nullptr, Name);
1455 else {
1456 Value *ICmp =
1457 Builder.CreateICmp(MinMaxIntrinsic::getPredicate(IntrinID), LHS, RHS);
1458 Sel = Builder.CreateSelect(ICmp, LHS, RHS, Name);
1459 }
1460 LHS = Sel;
1461 }
1462 SafeUDivMode = PrevSafeMode;
1463 return LHS;
1464}
1465
1466Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
1467 return expandMinMaxExpr(S, Intrinsic::smax, "smax");
1468}
1469
1470Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
1471 return expandMinMaxExpr(S, Intrinsic::umax, "umax");
1472}
1473
1474Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) {
1475 return expandMinMaxExpr(S, Intrinsic::smin, "smin");
1476}
1477
1478Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) {
1479 return expandMinMaxExpr(S, Intrinsic::umin, "umin");
1480}
1481
1482Value *SCEVExpander::visitSequentialUMinExpr(const SCEVSequentialUMinExpr *S) {
1483 return expandMinMaxExpr(S, Intrinsic::umin, "umin", /*IsSequential*/true);
1484}
1485
1486Value *SCEVExpander::visitVScale(const SCEVVScale *S) {
1487 return Builder.CreateVScale(S->getType());
1488}
1489
1492 setInsertPoint(IP);
1493 Value *V = expandCodeFor(SH, Ty);
1494 return V;
1495}
1496
1498 // Expand the code for this SCEV.
1499 Value *V = expand(SH);
1500
1501 if (Ty && Ty != V->getType()) {
1502 assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) &&
1503 "non-trivial casts should be done with the SCEVs directly!");
1504 V = InsertNoopCastOfTo(V, Ty);
1505 }
1506 return V;
1507}
1508
1509Value *SCEVExpander::FindValueInExprValueMap(
1510 const SCEV *S, const Instruction *InsertPt,
1511 SmallVectorImpl<Instruction *> &DropPoisonGeneratingInsts) {
1512 // If the expansion is not in CanonicalMode, and the SCEV contains any
1513 // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally.
1514 if (!CanonicalMode && SE.containsAddRecurrence(S))
1515 return nullptr;
1516
1517 // If S is a constant or unknown, it may be worse to reuse an existing Value.
1518 if (isa<SCEVConstant>(S) || isa<SCEVUnknown>(S))
1519 return nullptr;
1520
1521 for (Value *V : SE.getSCEVValues(S)) {
1522 Instruction *EntInst = dyn_cast<Instruction>(V);
1523 if (!EntInst)
1524 continue;
1525
1526 // Choose a Value from the set which dominates the InsertPt.
1527 // InsertPt should be inside the Value's parent loop so as not to break
1528 // the LCSSA form.
1529 assert(EntInst->getFunction() == InsertPt->getFunction());
1530 if (S->getType() != V->getType() || !SE.DT.dominates(EntInst, InsertPt) ||
1531 !(SE.LI.getLoopFor(EntInst->getParent()) == nullptr ||
1532 SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt)))
1533 continue;
1534
1535 // Make sure reusing the instruction is poison-safe.
1536 if (SE.canReuseInstruction(S, EntInst, DropPoisonGeneratingInsts))
1537 return V;
1538 DropPoisonGeneratingInsts.clear();
1539 }
1540 return nullptr;
1541}
1542
1543// The expansion of SCEV will either reuse a previous Value in ExprValueMap,
1544// or expand the SCEV literally. Specifically, if the expansion is in LSRMode,
1545// and the SCEV contains any sub scAddRecExpr type SCEV, it will be expanded
1546// literally, to prevent LSR's transformed SCEV from being reverted. Otherwise,
1547// the expansion will try to reuse Value from ExprValueMap, and only when it
1548// fails, expand the SCEV literally.
1549Value *SCEVExpander::expand(const SCEV *S) {
1550 // Compute an insertion point for this SCEV object. Hoist the instructions
1551 // as far out in the loop nest as possible.
1552 BasicBlock::iterator InsertPt = Builder.GetInsertPoint();
1553
1554 // We can move insertion point only if there is no div or rem operations
1555 // otherwise we are risky to move it over the check for zero denominator.
1556 auto SafeToHoist = [](const SCEV *S) {
1557 return !SCEVExprContains(S, [](const SCEV *S) {
1558 if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) {
1559 if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS()))
1560 // Division by non-zero constants can be hoisted.
1561 return SC->getValue()->isZero();
1562 // All other divisions should not be moved as they may be
1563 // divisions by zero and should be kept within the
1564 // conditions of the surrounding loops that guard their
1565 // execution (see PR35406).
1566 return true;
1567 }
1568 return false;
1569 });
1570 };
1571 if (SafeToHoist(S)) {
1572 for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());;
1573 L = L->getParentLoop()) {
1574 if (SE.isLoopInvariant(S, L)) {
1575 if (!L) break;
1576 if (BasicBlock *Preheader = L->getLoopPreheader()) {
1577 InsertPt = Preheader->getTerminator()->getIterator();
1578 } else {
1579 // LSR sets the insertion point for AddRec start/step values to the
1580 // block start to simplify value reuse, even though it's an invalid
1581 // position. SCEVExpander must correct for this in all cases.
1582 InsertPt = L->getHeader()->getFirstInsertionPt();
1583 }
1584 } else {
1585 // If the SCEV is computable at this level, insert it into the header
1586 // after the PHIs (and after any other instructions that we've inserted
1587 // there) so that it is guaranteed to dominate any user inside the loop.
1588 if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
1589 InsertPt = L->getHeader()->getFirstInsertionPt();
1590
1591 while (InsertPt != Builder.GetInsertPoint() &&
1592 (isInsertedInstruction(&*InsertPt))) {
1593 InsertPt = std::next(InsertPt);
1594 }
1595 break;
1596 }
1597 }
1598 }
1599
1600 // Check to see if we already expanded this here.
1601 auto I = InsertedExpressions.find(std::make_pair(S, &*InsertPt));
1602 if (I != InsertedExpressions.end())
1603 return I->second;
1604
1605 SCEVInsertPointGuard Guard(Builder, this);
1606 Builder.SetInsertPoint(InsertPt->getParent(), InsertPt);
1607
1608 // Expand the expression into instructions.
1609 SmallVector<Instruction *> DropPoisonGeneratingInsts;
1610 Value *V = FindValueInExprValueMap(S, &*InsertPt, DropPoisonGeneratingInsts);
1611 if (!V) {
1612 V = visit(S);
1613 V = fixupLCSSAFormFor(V);
1614 } else {
1615 for (Instruction *I : DropPoisonGeneratingInsts) {
1616 rememberFlags(I);
1617 I->dropPoisonGeneratingAnnotations();
1618 // See if we can re-infer from first principles any of the flags we just
1619 // dropped.
1620 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I))
1621 if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) {
1622 auto *BO = cast<BinaryOperator>(I);
1627 }
1628 if (auto *NNI = dyn_cast<PossiblyNonNegInst>(I)) {
1629 auto *Src = NNI->getOperand(0);
1631 Constant::getNullValue(Src->getType()), I,
1632 DL).value_or(false))
1633 NNI->setNonNeg(true);
1634 }
1635 }
1636 }
1637 // Remember the expanded value for this SCEV at this location.
1638 //
1639 // This is independent of PostIncLoops. The mapped value simply materializes
1640 // the expression at this insertion point. If the mapped value happened to be
1641 // a postinc expansion, it could be reused by a non-postinc user, but only if
1642 // its insertion point was already at the head of the loop.
1643 InsertedExpressions[std::make_pair(S, &*InsertPt)] = V;
1644 return V;
1645}
1646
1647void SCEVExpander::rememberInstruction(Value *I) {
1648 auto DoInsert = [this](Value *V) {
1649 if (!PostIncLoops.empty())
1650 InsertedPostIncValues.insert(V);
1651 else
1652 InsertedValues.insert(V);
1653 };
1654 DoInsert(I);
1655}
1656
1657void SCEVExpander::rememberFlags(Instruction *I) {
1658 // If we already have flags for the instruction, keep the existing ones.
1659 OrigFlags.try_emplace(I, PoisonFlags(I));
1660}
1661
1662void SCEVExpander::replaceCongruentIVInc(
1663 PHINode *&Phi, PHINode *&OrigPhi, Loop *L, const DominatorTree *DT,
1665 BasicBlock *LatchBlock = L->getLoopLatch();
1666 if (!LatchBlock)
1667 return;
1668
1669 Instruction *OrigInc =
1670 dyn_cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1671 Instruction *IsomorphicInc =
1672 dyn_cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock));
1673 if (!OrigInc || !IsomorphicInc)
1674 return;
1675
1676 // If this phi has the same width but is more canonical, replace the
1677 // original with it. As part of the "more canonical" determination,
1678 // respect a prior decision to use an IV chain.
1679 if (OrigPhi->getType() == Phi->getType()) {
1680 bool Chained = ChainedPhis.contains(Phi);
1681 if (!(Chained || isExpandedAddRecExprPHI(OrigPhi, OrigInc, L)) &&
1682 (Chained || isExpandedAddRecExprPHI(Phi, IsomorphicInc, L))) {
1683 std::swap(OrigPhi, Phi);
1684 std::swap(OrigInc, IsomorphicInc);
1685 }
1686 }
1687
1688 // Replacing the congruent phi is sufficient because acyclic
1689 // redundancy elimination, CSE/GVN, should handle the
1690 // rest. However, once SCEV proves that a phi is congruent,
1691 // it's often the head of an IV user cycle that is isomorphic
1692 // with the original phi. It's worth eagerly cleaning up the
1693 // common case of a single IV increment so that DeleteDeadPHIs
1694 // can remove cycles that had postinc uses.
1695 // Because we may potentially introduce a new use of OrigIV that didn't
1696 // exist before at this point, its poison flags need readjustment.
1697 const SCEV *TruncExpr =
1698 SE.getTruncateOrNoop(SE.getSCEV(OrigInc), IsomorphicInc->getType());
1699 if (OrigInc == IsomorphicInc || TruncExpr != SE.getSCEV(IsomorphicInc) ||
1700 !SE.LI.replacementPreservesLCSSAForm(IsomorphicInc, OrigInc))
1701 return;
1702
1703 bool BothHaveNUW = false;
1704 bool BothHaveNSW = false;
1705 auto *OBOIncV = dyn_cast<OverflowingBinaryOperator>(OrigInc);
1706 auto *OBOIsomorphic = dyn_cast<OverflowingBinaryOperator>(IsomorphicInc);
1707 if (OBOIncV && OBOIsomorphic) {
1708 BothHaveNUW =
1709 OBOIncV->hasNoUnsignedWrap() && OBOIsomorphic->hasNoUnsignedWrap();
1710 BothHaveNSW =
1711 OBOIncV->hasNoSignedWrap() && OBOIsomorphic->hasNoSignedWrap();
1712 }
1713
1714 if (!hoistIVInc(OrigInc, IsomorphicInc,
1715 /*RecomputePoisonFlags*/ true))
1716 return;
1717
1718 // We are replacing with a wider increment. If both OrigInc and IsomorphicInc
1719 // are NUW/NSW, then we can preserve them on the wider increment; the narrower
1720 // IsomorphicInc would wrap before the wider OrigInc, so the replacement won't
1721 // make IsomorphicInc's uses more poisonous.
1722 assert(OrigInc->getType()->getScalarSizeInBits() >=
1723 IsomorphicInc->getType()->getScalarSizeInBits() &&
1724 "Should only replace an increment with a wider one.");
1725 if (BothHaveNUW || BothHaveNSW) {
1726 OrigInc->setHasNoUnsignedWrap(OBOIncV->hasNoUnsignedWrap() || BothHaveNUW);
1727 OrigInc->setHasNoSignedWrap(OBOIncV->hasNoSignedWrap() || BothHaveNSW);
1728 }
1729
1730 SCEV_DEBUG_WITH_TYPE(DebugType,
1731 dbgs() << "INDVARS: Eliminated congruent iv.inc: "
1732 << *IsomorphicInc << '\n');
1733 Value *NewInc = OrigInc;
1734 if (OrigInc->getType() != IsomorphicInc->getType()) {
1736 if (PHINode *PN = dyn_cast<PHINode>(OrigInc))
1737 IP = PN->getParent()->getFirstInsertionPt();
1738 else
1739 IP = OrigInc->getNextNode()->getIterator();
1740
1741 IRBuilder<> Builder(IP->getParent(), IP);
1742 Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc());
1743 NewInc =
1744 Builder.CreateTruncOrBitCast(OrigInc, IsomorphicInc->getType(), IVName);
1745 }
1746 IsomorphicInc->replaceAllUsesWith(NewInc);
1747 DeadInsts.emplace_back(IsomorphicInc);
1748}
1749
1750/// replaceCongruentIVs - Check for congruent phis in this loop header and
1751/// replace them with their most canonical representative. Return the number of
1752/// phis eliminated.
1753///
1754/// This does not depend on any SCEVExpander state but should be used in
1755/// the same context that SCEVExpander is used.
1756unsigned
1759 const TargetTransformInfo *TTI) {
1760 // Find integer phis in order of increasing width.
1762 llvm::make_pointer_range(L->getHeader()->phis()));
1763
1764 if (TTI)
1765 // Use stable_sort to preserve order of equivalent PHIs, so the order
1766 // of the sorted Phis is the same from run to run on the same loop.
1767 llvm::stable_sort(Phis, [](Value *LHS, Value *RHS) {
1768 // Put pointers at the back and make sure pointer < pointer = false.
1769 if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
1770 return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy();
1773 });
1774
1775 unsigned NumElim = 0;
1777 // Process phis from wide to narrow. Map wide phis to their truncation
1778 // so narrow phis can reuse them.
1779 for (PHINode *Phi : Phis) {
1780 auto SimplifyPHINode = [&](PHINode *PN) -> Value * {
1781 if (Value *V = simplifyInstruction(PN, {DL, &SE.TLI, &SE.DT, &SE.AC}))
1782 return V;
1783 if (!SE.isSCEVable(PN->getType()))
1784 return nullptr;
1785 auto *Const = dyn_cast<SCEVConstant>(SE.getSCEV(PN));
1786 if (!Const)
1787 return nullptr;
1788 return Const->getValue();
1789 };
1790
1791 // Fold constant phis. They may be congruent to other constant phis and
1792 // would confuse the logic below that expects proper IVs.
1793 if (Value *V = SimplifyPHINode(Phi)) {
1794 if (V->getType() != Phi->getType())
1795 continue;
1796 SE.forgetValue(Phi);
1797 Phi->replaceAllUsesWith(V);
1798 DeadInsts.emplace_back(Phi);
1799 ++NumElim;
1800 SCEV_DEBUG_WITH_TYPE(DebugType,
1801 dbgs() << "INDVARS: Eliminated constant iv: " << *Phi
1802 << '\n');
1803 continue;
1804 }
1805
1806 if (!SE.isSCEVable(Phi->getType()))
1807 continue;
1808
1809 PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)];
1810 if (!OrigPhiRef) {
1811 OrigPhiRef = Phi;
1812 if (Phi->getType()->isIntegerTy() && TTI &&
1813 TTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) {
1814 // Make sure we only rewrite using simple induction variables;
1815 // otherwise, we can make the trip count of a loop unanalyzable
1816 // to SCEV.
1817 const SCEV *PhiExpr = SE.getSCEV(Phi);
1818 if (isa<SCEVAddRecExpr>(PhiExpr)) {
1819 // This phi can be freely truncated to the narrowest phi type. Map the
1820 // truncated expression to it so it will be reused for narrow types.
1821 const SCEV *TruncExpr =
1822 SE.getTruncateExpr(PhiExpr, Phis.back()->getType());
1823 ExprToIVMap[TruncExpr] = Phi;
1824 }
1825 }
1826 continue;
1827 }
1828
1829 // Replacing a pointer phi with an integer phi or vice-versa doesn't make
1830 // sense.
1831 if (OrigPhiRef->getType()->isPointerTy() != Phi->getType()->isPointerTy())
1832 continue;
1833
1834 replaceCongruentIVInc(Phi, OrigPhiRef, L, DT, DeadInsts);
1835 SCEV_DEBUG_WITH_TYPE(DebugType,
1836 dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi
1837 << '\n');
1839 DebugType, dbgs() << "INDVARS: Original iv: " << *OrigPhiRef << '\n');
1840 ++NumElim;
1841 Value *NewIV = OrigPhiRef;
1842 if (OrigPhiRef->getType() != Phi->getType()) {
1843 IRBuilder<> Builder(L->getHeader(),
1844 L->getHeader()->getFirstInsertionPt());
1845 Builder.SetCurrentDebugLocation(Phi->getDebugLoc());
1846 NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName);
1847 }
1848 Phi->replaceAllUsesWith(NewIV);
1849 DeadInsts.emplace_back(Phi);
1850 }
1851 return NumElim;
1852}
1853
1855 const Instruction *At,
1856 Loop *L) {
1857 using namespace llvm::PatternMatch;
1858
1859 SmallVector<BasicBlock *, 4> ExitingBlocks;
1860 L->getExitingBlocks(ExitingBlocks);
1861
1862 // Look for suitable value in simple conditions at the loop exits.
1863 for (BasicBlock *BB : ExitingBlocks) {
1864 CmpPredicate Pred;
1865 Instruction *LHS, *RHS;
1866
1867 if (!match(BB->getTerminator(),
1870 continue;
1871
1872 if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At))
1873 return true;
1874
1875 if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At))
1876 return true;
1877 }
1878
1879 // Use expand's logic which is used for reusing a previous Value in
1880 // ExprValueMap. Note that we don't currently model the cost of
1881 // needing to drop poison generating flags on the instruction if we
1882 // want to reuse it. We effectively assume that has zero cost.
1883 SmallVector<Instruction *> DropPoisonGeneratingInsts;
1884 return FindValueInExprValueMap(S, At, DropPoisonGeneratingInsts) != nullptr;
1885}
1886
1887template<typename T> static InstructionCost costAndCollectOperands(
1890 SmallVectorImpl<SCEVOperand> &Worklist) {
1891
1892 const T *S = cast<T>(WorkItem.S);
1894 // Object to help map SCEV operands to expanded IR instructions.
1895 struct OperationIndices {
1896 OperationIndices(unsigned Opc, size_t min, size_t max) :
1897 Opcode(Opc), MinIdx(min), MaxIdx(max) { }
1898 unsigned Opcode;
1899 size_t MinIdx;
1900 size_t MaxIdx;
1901 };
1902
1903 // Collect the operations of all the instructions that will be needed to
1904 // expand the SCEVExpr. This is so that when we come to cost the operands,
1905 // we know what the generated user(s) will be.
1907
1908 auto CastCost = [&](unsigned Opcode) -> InstructionCost {
1909 Operations.emplace_back(Opcode, 0, 0);
1910 return TTI.getCastInstrCost(Opcode, S->getType(),
1911 S->getOperand(0)->getType(),
1913 };
1914
1915 auto ArithCost = [&](unsigned Opcode, unsigned NumRequired,
1916 unsigned MinIdx = 0,
1917 unsigned MaxIdx = 1) -> InstructionCost {
1918 Operations.emplace_back(Opcode, MinIdx, MaxIdx);
1919 return NumRequired *
1920 TTI.getArithmeticInstrCost(Opcode, S->getType(), CostKind);
1921 };
1922
1923 auto CmpSelCost = [&](unsigned Opcode, unsigned NumRequired, unsigned MinIdx,
1924 unsigned MaxIdx) -> InstructionCost {
1925 Operations.emplace_back(Opcode, MinIdx, MaxIdx);
1926 Type *OpType = S->getType();
1927 return NumRequired * TTI.getCmpSelInstrCost(
1928 Opcode, OpType, CmpInst::makeCmpResultType(OpType),
1930 };
1931
1932 switch (S->getSCEVType()) {
1933 case scCouldNotCompute:
1934 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
1935 case scUnknown:
1936 case scConstant:
1937 case scVScale:
1938 return 0;
1939 case scPtrToInt:
1940 Cost = CastCost(Instruction::PtrToInt);
1941 break;
1942 case scTruncate:
1943 Cost = CastCost(Instruction::Trunc);
1944 break;
1945 case scZeroExtend:
1946 Cost = CastCost(Instruction::ZExt);
1947 break;
1948 case scSignExtend:
1949 Cost = CastCost(Instruction::SExt);
1950 break;
1951 case scUDivExpr: {
1952 unsigned Opcode = Instruction::UDiv;
1953 if (auto *SC = dyn_cast<SCEVConstant>(S->getOperand(1)))
1954 if (SC->getAPInt().isPowerOf2())
1955 Opcode = Instruction::LShr;
1956 Cost = ArithCost(Opcode, 1);
1957 break;
1958 }
1959 case scAddExpr:
1960 Cost = ArithCost(Instruction::Add, S->getNumOperands() - 1);
1961 break;
1962 case scMulExpr:
1963 // TODO: this is a very pessimistic cost modelling for Mul,
1964 // because of Bin Pow algorithm actually used by the expander,
1965 // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN().
1966 Cost = ArithCost(Instruction::Mul, S->getNumOperands() - 1);
1967 break;
1968 case scSMaxExpr:
1969 case scUMaxExpr:
1970 case scSMinExpr:
1971 case scUMinExpr:
1972 case scSequentialUMinExpr: {
1973 // FIXME: should this ask the cost for Intrinsic's?
1974 // The reduction tree.
1975 Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 1);
1976 Cost += CmpSelCost(Instruction::Select, S->getNumOperands() - 1, 0, 2);
1977 switch (S->getSCEVType()) {
1978 case scSequentialUMinExpr: {
1979 // The safety net against poison.
1980 // FIXME: this is broken.
1981 Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 0);
1982 Cost += ArithCost(Instruction::Or,
1983 S->getNumOperands() > 2 ? S->getNumOperands() - 2 : 0);
1984 Cost += CmpSelCost(Instruction::Select, 1, 0, 1);
1985 break;
1986 }
1987 default:
1988 assert(!isa<SCEVSequentialMinMaxExpr>(S) &&
1989 "Unhandled SCEV expression type?");
1990 break;
1991 }
1992 break;
1993 }
1994 case scAddRecExpr: {
1995 // Addrec expands to a phi and add per recurrence.
1996 unsigned NumRecurrences = S->getNumOperands() - 1;
1997 Cost += TTI.getCFInstrCost(Instruction::PHI, CostKind) * NumRecurrences;
1998 Cost +=
1999 TTI.getArithmeticInstrCost(Instruction::Add, S->getType(), CostKind) *
2000 NumRecurrences;
2001 // AR start is used in phi.
2002 Worklist.emplace_back(Instruction::PHI, 0, S->getOperand(0));
2003 // Other operands are used in add.
2004 for (const SCEV *Op : S->operands().drop_front())
2005 Worklist.emplace_back(Instruction::Add, 1, Op);
2006 break;
2007 }
2008 }
2009
2010 for (auto &CostOp : Operations) {
2011 for (auto SCEVOp : enumerate(S->operands())) {
2012 // Clamp the index to account for multiple IR operations being chained.
2013 size_t MinIdx = std::max(SCEVOp.index(), CostOp.MinIdx);
2014 size_t OpIdx = std::min(MinIdx, CostOp.MaxIdx);
2015 Worklist.emplace_back(CostOp.Opcode, OpIdx, SCEVOp.value());
2016 }
2017 }
2018 return Cost;
2019}
2020
2021bool SCEVExpander::isHighCostExpansionHelper(
2022 const SCEVOperand &WorkItem, Loop *L, const Instruction &At,
2023 InstructionCost &Cost, unsigned Budget, const TargetTransformInfo &TTI,
2025 SmallVectorImpl<SCEVOperand> &Worklist) {
2026 if (Cost > Budget)
2027 return true; // Already run out of budget, give up.
2028
2029 const SCEV *S = WorkItem.S;
2030 // Was the cost of expansion of this expression already accounted for?
2031 if (!isa<SCEVConstant>(S) && !Processed.insert(S).second)
2032 return false; // We have already accounted for this expression.
2033
2034 // If we can find an existing value for this scev available at the point "At"
2035 // then consider the expression cheap.
2036 if (hasRelatedExistingExpansion(S, &At, L))
2037 return false; // Consider the expression to be free.
2038
2040 L->getHeader()->getParent()->hasMinSize()
2043
2044 switch (S->getSCEVType()) {
2045 case scCouldNotCompute:
2046 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
2047 case scUnknown:
2048 case scVScale:
2049 // Assume to be zero-cost.
2050 return false;
2051 case scConstant: {
2052 // Only evalulate the costs of constants when optimizing for size.
2054 return false;
2055 const APInt &Imm = cast<SCEVConstant>(S)->getAPInt();
2056 Type *Ty = S->getType();
2058 WorkItem.ParentOpcode, WorkItem.OperandIdx, Imm, Ty, CostKind);
2059 return Cost > Budget;
2060 }
2061 case scTruncate:
2062 case scPtrToInt:
2063 case scZeroExtend:
2064 case scSignExtend: {
2065 Cost +=
2066 costAndCollectOperands<SCEVCastExpr>(WorkItem, TTI, CostKind, Worklist);
2067 return false; // Will answer upon next entry into this function.
2068 }
2069 case scUDivExpr: {
2070 // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or
2071 // HowManyLessThans produced to compute a precise expression, rather than a
2072 // UDiv from the user's code. If we can't find a UDiv in the code with some
2073 // simple searching, we need to account for it's cost.
2074
2075 // At the beginning of this function we already tried to find existing
2076 // value for plain 'S'. Now try to lookup 'S + 1' since it is common
2077 // pattern involving division. This is just a simple search heuristic.
2079 SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), &At, L))
2080 return false; // Consider it to be free.
2081
2082 Cost +=
2083 costAndCollectOperands<SCEVUDivExpr>(WorkItem, TTI, CostKind, Worklist);
2084 return false; // Will answer upon next entry into this function.
2085 }
2086 case scAddExpr:
2087 case scMulExpr:
2088 case scUMaxExpr:
2089 case scSMaxExpr:
2090 case scUMinExpr:
2091 case scSMinExpr:
2092 case scSequentialUMinExpr: {
2093 assert(cast<SCEVNAryExpr>(S)->getNumOperands() > 1 &&
2094 "Nary expr should have more than 1 operand.");
2095 // The simple nary expr will require one less op (or pair of ops)
2096 // than the number of it's terms.
2097 Cost +=
2098 costAndCollectOperands<SCEVNAryExpr>(WorkItem, TTI, CostKind, Worklist);
2099 return Cost > Budget;
2100 }
2101 case scAddRecExpr: {
2102 assert(cast<SCEVAddRecExpr>(S)->getNumOperands() >= 2 &&
2103 "Polynomial should be at least linear");
2104 Cost += costAndCollectOperands<SCEVAddRecExpr>(
2105 WorkItem, TTI, CostKind, Worklist);
2106 return Cost > Budget;
2107 }
2108 }
2109 llvm_unreachable("Unknown SCEV kind!");
2110}
2111
2113 Instruction *IP) {
2114 assert(IP);
2115 switch (Pred->getKind()) {
2117 return expandUnionPredicate(cast<SCEVUnionPredicate>(Pred), IP);
2119 return expandComparePredicate(cast<SCEVComparePredicate>(Pred), IP);
2120 case SCEVPredicate::P_Wrap: {
2121 auto *AddRecPred = cast<SCEVWrapPredicate>(Pred);
2122 return expandWrapPredicate(AddRecPred, IP);
2123 }
2124 }
2125 llvm_unreachable("Unknown SCEV predicate type");
2126}
2127
2129 Instruction *IP) {
2130 Value *Expr0 = expand(Pred->getLHS(), IP);
2131 Value *Expr1 = expand(Pred->getRHS(), IP);
2132
2133 Builder.SetInsertPoint(IP);
2134 auto InvPred = ICmpInst::getInversePredicate(Pred->getPredicate());
2135 auto *I = Builder.CreateICmp(InvPred, Expr0, Expr1, "ident.check");
2136 return I;
2137}
2138
2140 Instruction *Loc, bool Signed) {
2141 assert(AR->isAffine() && "Cannot generate RT check for "
2142 "non-affine expression");
2143
2144 // FIXME: It is highly suspicious that we're ignoring the predicates here.
2146 const SCEV *ExitCount =
2148
2149 assert(!isa<SCEVCouldNotCompute>(ExitCount) && "Invalid loop count");
2150
2151 const SCEV *Step = AR->getStepRecurrence(SE);
2152 const SCEV *Start = AR->getStart();
2153
2154 Type *ARTy = AR->getType();
2155 unsigned SrcBits = SE.getTypeSizeInBits(ExitCount->getType());
2156 unsigned DstBits = SE.getTypeSizeInBits(ARTy);
2157
2158 // The expression {Start,+,Step} has nusw/nssw if
2159 // Step < 0, Start - |Step| * Backedge <= Start
2160 // Step >= 0, Start + |Step| * Backedge > Start
2161 // and |Step| * Backedge doesn't unsigned overflow.
2162
2163 Builder.SetInsertPoint(Loc);
2164 Value *TripCountVal = expand(ExitCount, Loc);
2165
2166 IntegerType *Ty =
2168
2169 Value *StepValue = expand(Step, Loc);
2170 Value *NegStepValue = expand(SE.getNegativeSCEV(Step), Loc);
2171 Value *StartValue = expand(Start, Loc);
2172
2173 ConstantInt *Zero =
2174 ConstantInt::get(Loc->getContext(), APInt::getZero(DstBits));
2175
2176 Builder.SetInsertPoint(Loc);
2177 // Compute |Step|
2178 Value *StepCompare = Builder.CreateICmp(ICmpInst::ICMP_SLT, StepValue, Zero);
2179 Value *AbsStep = Builder.CreateSelect(StepCompare, NegStepValue, StepValue);
2180
2181 // Compute |Step| * Backedge
2182 // Compute:
2183 // 1. Start + |Step| * Backedge < Start
2184 // 2. Start - |Step| * Backedge > Start
2185 //
2186 // And select either 1. or 2. depending on whether step is positive or
2187 // negative. If Step is known to be positive or negative, only create
2188 // either 1. or 2.
2189 auto ComputeEndCheck = [&]() -> Value * {
2190 // Checking <u 0 is always false.
2191 if (!Signed && Start->isZero() && SE.isKnownPositive(Step))
2192 return ConstantInt::getFalse(Loc->getContext());
2193
2194 // Get the backedge taken count and truncate or extended to the AR type.
2195 Value *TruncTripCount = Builder.CreateZExtOrTrunc(TripCountVal, Ty);
2196
2197 Value *MulV, *OfMul;
2198 if (Step->isOne()) {
2199 // Special-case Step of one. Potentially-costly `umul_with_overflow` isn't
2200 // needed, there is never an overflow, so to avoid artificially inflating
2201 // the cost of the check, directly emit the optimized IR.
2202 MulV = TruncTripCount;
2203 OfMul = ConstantInt::getFalse(MulV->getContext());
2204 } else {
2205 CallInst *Mul = Builder.CreateIntrinsic(Intrinsic::umul_with_overflow, Ty,
2206 {AbsStep, TruncTripCount},
2207 /*FMFSource=*/nullptr, "mul");
2208 MulV = Builder.CreateExtractValue(Mul, 0, "mul.result");
2209 OfMul = Builder.CreateExtractValue(Mul, 1, "mul.overflow");
2210 }
2211
2212 Value *Add = nullptr, *Sub = nullptr;
2213 bool NeedPosCheck = !SE.isKnownNegative(Step);
2214 bool NeedNegCheck = !SE.isKnownPositive(Step);
2215
2216 if (isa<PointerType>(ARTy)) {
2217 Value *NegMulV = Builder.CreateNeg(MulV);
2218 if (NeedPosCheck)
2219 Add = Builder.CreatePtrAdd(StartValue, MulV);
2220 if (NeedNegCheck)
2221 Sub = Builder.CreatePtrAdd(StartValue, NegMulV);
2222 } else {
2223 if (NeedPosCheck)
2224 Add = Builder.CreateAdd(StartValue, MulV);
2225 if (NeedNegCheck)
2226 Sub = Builder.CreateSub(StartValue, MulV);
2227 }
2228
2229 Value *EndCompareLT = nullptr;
2230 Value *EndCompareGT = nullptr;
2231 Value *EndCheck = nullptr;
2232 if (NeedPosCheck)
2233 EndCheck = EndCompareLT = Builder.CreateICmp(
2235 if (NeedNegCheck)
2236 EndCheck = EndCompareGT = Builder.CreateICmp(
2238 if (NeedPosCheck && NeedNegCheck) {
2239 // Select the answer based on the sign of Step.
2240 EndCheck = Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT);
2241 }
2242 return Builder.CreateOr(EndCheck, OfMul);
2243 };
2244 Value *EndCheck = ComputeEndCheck();
2245
2246 // If the backedge taken count type is larger than the AR type,
2247 // check that we don't drop any bits by truncating it. If we are
2248 // dropping bits, then we have overflow (unless the step is zero).
2249 if (SrcBits > DstBits) {
2250 auto MaxVal = APInt::getMaxValue(DstBits).zext(SrcBits);
2251 auto *BackedgeCheck =
2252 Builder.CreateICmp(ICmpInst::ICMP_UGT, TripCountVal,
2253 ConstantInt::get(Loc->getContext(), MaxVal));
2254 BackedgeCheck = Builder.CreateAnd(
2255 BackedgeCheck, Builder.CreateICmp(ICmpInst::ICMP_NE, StepValue, Zero));
2256
2257 EndCheck = Builder.CreateOr(EndCheck, BackedgeCheck);
2258 }
2259
2260 return EndCheck;
2261}
2262
2264 Instruction *IP) {
2265 const auto *A = cast<SCEVAddRecExpr>(Pred->getExpr());
2266 Value *NSSWCheck = nullptr, *NUSWCheck = nullptr;
2267
2268 // Add a check for NUSW
2270 NUSWCheck = generateOverflowCheck(A, IP, false);
2271
2272 // Add a check for NSSW
2274 NSSWCheck = generateOverflowCheck(A, IP, true);
2275
2276 if (NUSWCheck && NSSWCheck)
2277 return Builder.CreateOr(NUSWCheck, NSSWCheck);
2278
2279 if (NUSWCheck)
2280 return NUSWCheck;
2281
2282 if (NSSWCheck)
2283 return NSSWCheck;
2284
2285 return ConstantInt::getFalse(IP->getContext());
2286}
2287
2289 Instruction *IP) {
2290 // Loop over all checks in this set.
2291 SmallVector<Value *> Checks;
2292 for (const auto *Pred : Union->getPredicates()) {
2293 Checks.push_back(expandCodeForPredicate(Pred, IP));
2294 Builder.SetInsertPoint(IP);
2295 }
2296
2297 if (Checks.empty())
2298 return ConstantInt::getFalse(IP->getContext());
2299 return Builder.CreateOr(Checks);
2300}
2301
2302Value *SCEVExpander::fixupLCSSAFormFor(Value *V) {
2303 auto *DefI = dyn_cast<Instruction>(V);
2304 if (!PreserveLCSSA || !DefI)
2305 return V;
2306
2307 BasicBlock::iterator InsertPt = Builder.GetInsertPoint();
2308 Loop *DefLoop = SE.LI.getLoopFor(DefI->getParent());
2309 Loop *UseLoop = SE.LI.getLoopFor(InsertPt->getParent());
2310 if (!DefLoop || UseLoop == DefLoop || DefLoop->contains(UseLoop))
2311 return V;
2312
2313 // Create a temporary instruction to at the current insertion point, so we
2314 // can hand it off to the helper to create LCSSA PHIs if required for the
2315 // new use.
2316 // FIXME: Ideally formLCSSAForInstructions (used in fixupLCSSAFormFor)
2317 // would accept a insertion point and return an LCSSA phi for that
2318 // insertion point, so there is no need to insert & remove the temporary
2319 // instruction.
2320 Type *ToTy;
2321 if (DefI->getType()->isIntegerTy())
2322 ToTy = PointerType::get(DefI->getContext(), 0);
2323 else
2324 ToTy = Type::getInt32Ty(DefI->getContext());
2325 Instruction *User =
2326 CastInst::CreateBitOrPointerCast(DefI, ToTy, "tmp.lcssa.user", InsertPt);
2327 auto RemoveUserOnExit =
2328 make_scope_exit([User]() { User->eraseFromParent(); });
2329
2331 ToUpdate.push_back(DefI);
2332 SmallVector<PHINode *, 16> PHIsToRemove;
2333 SmallVector<PHINode *, 16> InsertedPHIs;
2334 formLCSSAForInstructions(ToUpdate, SE.DT, SE.LI, &SE, &PHIsToRemove,
2335 &InsertedPHIs);
2336 for (PHINode *PN : InsertedPHIs)
2337 rememberInstruction(PN);
2338 for (PHINode *PN : PHIsToRemove) {
2339 if (!PN->use_empty())
2340 continue;
2341 InsertedValues.erase(PN);
2342 InsertedPostIncValues.erase(PN);
2343 PN->eraseFromParent();
2344 }
2345
2346 return User->getOperand(0);
2347}
2348
2349namespace {
2350// Search for a SCEV subexpression that is not safe to expand. Any expression
2351// that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
2352// UDiv expressions. We don't know if the UDiv is derived from an IR divide
2353// instruction, but the important thing is that we prove the denominator is
2354// nonzero before expansion.
2355//
2356// IVUsers already checks that IV-derived expressions are safe. So this check is
2357// only needed when the expression includes some subexpression that is not IV
2358// derived.
2359//
2360// Currently, we only allow division by a value provably non-zero here.
2361//
2362// We cannot generally expand recurrences unless the step dominates the loop
2363// header. The expander handles the special case of affine recurrences by
2364// scaling the recurrence outside the loop, but this technique isn't generally
2365// applicable. Expanding a nested recurrence outside a loop requires computing
2366// binomial coefficients. This could be done, but the recurrence has to be in a
2367// perfectly reduced form, which can't be guaranteed.
2368struct SCEVFindUnsafe {
2369 ScalarEvolution &SE;
2370 bool CanonicalMode;
2371 bool IsUnsafe = false;
2372
2373 SCEVFindUnsafe(ScalarEvolution &SE, bool CanonicalMode)
2374 : SE(SE), CanonicalMode(CanonicalMode) {}
2375
2376 bool follow(const SCEV *S) {
2377 if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
2378 if (!SE.isKnownNonZero(D->getRHS())) {
2379 IsUnsafe = true;
2380 return false;
2381 }
2382 }
2383 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
2384 // For non-affine addrecs or in non-canonical mode we need a preheader
2385 // to insert into.
2386 if (!AR->getLoop()->getLoopPreheader() &&
2387 (!CanonicalMode || !AR->isAffine())) {
2388 IsUnsafe = true;
2389 return false;
2390 }
2391 }
2392 return true;
2393 }
2394 bool isDone() const { return IsUnsafe; }
2395};
2396} // namespace
2397
2399 SCEVFindUnsafe Search(SE, CanonicalMode);
2400 visitAll(S, Search);
2401 return !Search.IsUnsafe;
2402}
2403
2405 const Instruction *InsertionPoint) const {
2406 if (!isSafeToExpand(S))
2407 return false;
2408 // We have to prove that the expanded site of S dominates InsertionPoint.
2409 // This is easy when not in the same block, but hard when S is an instruction
2410 // to be expanded somewhere inside the same block as our insertion point.
2411 // What we really need here is something analogous to an OrderedBasicBlock,
2412 // but for the moment, we paper over the problem by handling two common and
2413 // cheap to check cases.
2414 if (SE.properlyDominates(S, InsertionPoint->getParent()))
2415 return true;
2416 if (SE.dominates(S, InsertionPoint->getParent())) {
2417 if (InsertionPoint->getParent()->getTerminator() == InsertionPoint)
2418 return true;
2419 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S))
2420 if (llvm::is_contained(InsertionPoint->operand_values(), U->getValue()))
2421 return true;
2422 }
2423 return false;
2424}
2425
2427 // Result is used, nothing to remove.
2428 if (ResultUsed)
2429 return;
2430
2431 // Restore original poison flags.
2432 for (auto [I, Flags] : Expander.OrigFlags)
2433 Flags.apply(I);
2434
2435 auto InsertedInstructions = Expander.getAllInsertedInstructions();
2436#ifndef NDEBUG
2438 InsertedInstructions);
2439 (void)InsertedSet;
2440#endif
2441 // Remove sets with value handles.
2442 Expander.clear();
2443
2444 // Remove all inserted instructions.
2445 for (Instruction *I : reverse(InsertedInstructions)) {
2446#ifndef NDEBUG
2447 assert(all_of(I->users(),
2448 [&InsertedSet](Value *U) {
2449 return InsertedSet.contains(cast<Instruction>(U));
2450 }) &&
2451 "removed instruction should only be used by instructions inserted "
2452 "during expansion");
2453#endif
2454 assert(!I->getType()->isVoidTy() &&
2455 "inserted instruction should have non-void types");
2456 I->replaceAllUsesWith(PoisonValue::get(I->getType()));
2457 I->eraseFromParent();
2458 }
2459}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Hexagon Common GEP
Hexagon Hardware Loops
#define I(x, y, z)
Definition: MD5.cpp:58
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
#define P(N)
This file contains some templates that are useful if you are working with the STL at all.
static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR)
static const Loop * PickMostRelevantLoop(const Loop *A, const Loop *B, DominatorTree &DT)
PickMostRelevantLoop - Given two loops pick the one that's most relevant for SCEV expansion.
static InstructionCost costAndCollectOperands(const SCEVOperand &WorkItem, const TargetTransformInfo &TTI, TargetTransformInfo::TargetCostKind CostKind, SmallVectorImpl< SCEVOperand > &Worklist)
static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR)
static bool canBeCheaplyTransformed(ScalarEvolution &SE, const SCEVAddRecExpr *Phi, const SCEVAddRecExpr *Requested, bool &InvertStep)
Check whether we can cheaply express the requested SCEV in terms of the available PHI SCEV by truncat...
#define SCEV_DEBUG_WITH_TYPE(TYPE, X)
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:1012
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:206
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:200
This class represents an incoming formal argument to a Function.
Definition: Argument.h:32
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:459
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:528
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:393
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:437
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:213
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:233
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:448
static LLVM_ABI Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:612
static LLVM_ABI CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:984
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:707
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:701
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:705
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:703
@ ICMP_NE
not equal
Definition: InstrTypes.h:700
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:706
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:791
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Definition: CmpPredicate.h:23
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1120
static LLVM_ABI Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:2213
This is the shared class of boolean and integer constants.
Definition: Constants.h:87
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:875
This is an important base class in LLVM.
Definition: Constant.h:43
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:373
This class represents an Operation in the Expression.
bool isNonIntegralPointerType(PointerType *PT) const
Definition: DataLayout.h:375
A debug info location.
Definition: DebugLoc.h:124
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:165
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:135
const BasicBlock & getEntryBlock() const
Definition: Function.h:807
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags noUnsignedWrap()
static GEPNoWrapFlags none()
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2100
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2618
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1005
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:202
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2094
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2637
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:2036
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Definition: IRBuilder.h:2238
Value * CreateVScale(Type *Ty, const Twine &Name="")
Create a call to llvm.vscale.<Ty>().
Definition: IRBuilder.h:958
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:201
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:247
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNSW=false)
Definition: IRBuilder.h:1781
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:834
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2494
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Definition: IRBuilder.h:172
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1420
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2082
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1551
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1403
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2194
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2068
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:207
Value * CreateTruncOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2230
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2439
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition: IRBuilder.h:1573
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:552
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2780
LLVM_ABI void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:513
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:82
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:312
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:510
Class to represent integer types.
Definition: DerivedTypes.h:42
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:319
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getHeader() const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form.
Definition: LoopInfo.h:442
bool movementPreservesLCSSAForm(Instruction *Inst, Instruction *NewLoc)
Checks if moving a specific instruction can break LCSSA in any loop.
Definition: LoopInfo.h:468
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:40
ICmpInst::Predicate getPredicate() const
Returns the comparison predicate underlying the intrinsic.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
bool isComplete() const
If the PHI node is complete which means all of its parent's predecessors have incoming value in this ...
Value * getIncomingValueForBlock(const BasicBlock *BB) const
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1885
This node represents an addition of some number of SCEVs.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const SCEV * getOperand() const
This class represents an assumption that the expression LHS Pred RHS evaluates to true,...
const SCEV * getRHS() const
Returns the right hand side of the predicate.
ICmpInst::Predicate getPredicate() const
const SCEV * getLHS() const
Returns the left hand side of the predicate.
This class represents a constant integer value.
LLVM_ABI Value * generateOverflowCheck(const SCEVAddRecExpr *AR, Instruction *Loc, bool Signed)
Generates code that evaluates if the AR expression will overflow.
LLVM_ABI bool hasRelatedExistingExpansion(const SCEV *S, const Instruction *At, Loop *L)
Determine whether there is an existing expansion of S that can be reused.
SmallVector< Instruction *, 32 > getAllInsertedInstructions() const
Return a vector containing all instructions inserted during expansion.
LLVM_ABI bool isSafeToExpand(const SCEV *S) const
Return true if the given expression is safe to expand in the sense that all materialized values are s...
LLVM_ABI bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint) const
Return true if the given expression is safe to expand in the sense that all materialized values are d...
LLVM_ABI unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetTransformInfo *TTI=nullptr)
replace congruent phis with their most canonical representative.
LLVM_ABI Value * expandUnionPredicate(const SCEVUnionPredicate *Pred, Instruction *Loc)
A specialized variant of expandCodeForPredicate, handling the case when we are expanding code for a S...
LLVM_ABI bool hoistIVInc(Instruction *IncV, Instruction *InsertPos, bool RecomputePoisonFlags=false)
Utility for hoisting IncV (with all subexpressions requried for its computation) before InsertPos.
void clear()
Erase the contents of the InsertedExpressions map so that users trying to expand the same expression ...
bool isInsertedInstruction(Instruction *I) const
Return true if the specified instruction was inserted by the code rewriter.
LLVM_ABI Value * expandCodeForPredicate(const SCEVPredicate *Pred, Instruction *Loc)
Generates a code sequence that evaluates this predicate.
static LLVM_ABI bool canReuseFlagsFromOriginalIVInc(PHINode *OrigPhi, PHINode *WidePhi, Instruction *OrigInc, Instruction *WideInc)
Return true if both increments directly increment the corresponding IV PHI nodes and have the same op...
LLVM_ABI Value * expandComparePredicate(const SCEVComparePredicate *Pred, Instruction *Loc)
A specialized variant of expandCodeForPredicate, handling the case when we are expanding code for a S...
LLVM_ABI Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
LLVM_ABI Value * expandWrapPredicate(const SCEVWrapPredicate *P, Instruction *Loc)
A specialized variant of expandCodeForPredicate, handling the case when we are expanding code for a S...
LLVM_ABI Instruction * getIVIncOperand(Instruction *IncV, Instruction *InsertPos, bool allowScale)
Return the induction variable increment's IV operand.
LLVM_ABI BasicBlock::iterator findInsertPointAfter(Instruction *I, Instruction *MustDominate) const
Returns a suitable insert point after I, that dominates MustDominate.
void setInsertPoint(Instruction *IP)
Set the current insertion point.
This node represents multiplication of some number of SCEVs.
This node is a base class providing common functionality for n'ary operators.
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
const SCEV * getOperand(unsigned i) const
ArrayRef< const SCEV * > operands() const
This class represents an assumption made using SCEV expressions which can be checked at run-time.
SCEVPredicateKind getKind() const
This class represents a cast from a pointer to a pointer-sized integer value.
This class represents a signed maximum selection.
This class represents a signed minimum selection.
This class represents a sequential/in-order unsigned minimum selection.
This class represents a sign extension of a small integer value to a larger integer value.
This class represents a truncation of an integer value to a smaller integer value.
This class represents a binary unsigned division operation.
const SCEV * getLHS() const
const SCEV * getRHS() const
This class represents an unsigned maximum selection.
This class represents an unsigned minimum selection.
This class represents a composition of other SCEV predicates, and is the class that most clients will...
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents the value of vscale, as used when defining the length of a scalable vector or r...
This class represents an assumption made on an AddRec expression.
const SCEVAddRecExpr * getExpr() const
Implementation of the SCEVPredicate interface.
IncrementWrapFlags getFlags() const
Returns the set assumed no overflow flags.
This class represents a zero extension of a small integer value to a larger integer value.
This class represents an analyzed expression in the program.
LLVM_ABI ArrayRef< const SCEV * > operands() const
Return operands of this SCEV expression.
LLVM_ABI bool isOne() const
Return true if the expression is a constant one.
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
LLVM_ABI bool isNonConstantNegative() const
Return true if the specified scev is negated, but not a constant.
SCEVTypes getSCEVType() const
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
NoWrapFlags
NoWrapFlags are bitfield indices into SubclassData.
The main scalar evolution driver.
LLVM_ABI bool isKnownNonNegative(const SCEV *S)
Test if the given expression is known to be non-negative.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
LLVM_ABI const SCEV * removePointerBase(const SCEV *S)
Compute an expression equivalent to S - getPointerBase(S).
LLVM_ABI bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
LLVM_ABI uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getTruncateOrNoop(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
LLVM_ABI const SCEV * getPtrToIntExpr(const SCEV *Op, Type *Ty)
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
LLVM_ABI bool containsAddRecurrence(const SCEV *S)
Return true if the SCEV is a scAddRecExpr or it contains scAddRecExpr.
LLVM_ABI const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
LLVM_ABI const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
static SCEV::NoWrapFlags clearFlags(SCEV::NoWrapFlags Flags, SCEV::NoWrapFlags OffFlags)
LLVM_ABI void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
LLVM_ABI const SCEV * getNoopOrAnyExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
LLVM_ABI const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getAnyExtendExpr(const SCEV *Op, Type *Ty)
getAnyExtendExpr - Return a SCEV for the given operand extended with unspecified bits out to the give...
LLVM_ABI std::optional< SCEV::NoWrapFlags > getStrengthenedNoWrapFlagsFromBinOp(const OverflowingBinaryOperator *OBO)
Parse NSW/NUW flags from add/sub/mul IR binary operation Op into SCEV no-wrap flags,...
LLVM_ABI const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI bool hasComputableLoopEvolution(const SCEV *S, const Loop *L)
Return true if the given SCEV changes value in a known way in the specified loop.
LLVM_ABI const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
LLVM_ABI bool dominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV dominate the specified basic block.
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI const SCEV * getUnknown(Value *V)
static SCEV::NoWrapFlags maskFlags(SCEV::NoWrapFlags Flags, int Mask)
Convenient NoWrapFlags manipulation that hides enum casts and is visible in the ScalarEvolution name ...
LLVM_ABI bool properlyDominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV properly dominate the specified basic block.
LLVM_ABI bool canReuseInstruction(const SCEV *S, Instruction *I, SmallVectorImpl< Instruction * > &DropPoisonGeneratingInsts)
Check whether it is poison-safe to represent the expression S using the instruction I.
LLVM_ABI const SCEV * getPredicatedSymbolicMaxBackedgeTakenCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > &Predicates)
Similar to getSymbolicMaxBackedgeTakenCount, except it will add a set of SCEV predicates to Predicate...
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:380
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:470
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
bool empty() const
Definition: SmallVector.h:82
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:938
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo Op1Info={OK_AnyValue, OP_None}, OperandValueInfo Op2Info={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
LLVM_ABI InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
LLVM_ABI InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr) const
Return the expected cost of materialization for the given integer immediate of the specified type for...
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_CodeSize
Instruction code size.
LLVM_ABI InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
LLVM_ABI bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it's free to truncate a value of type Ty1 to type Ty2.
LLVM_ABI InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
@ None
The cast is not used with a load/store of any kind.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:273
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:267
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:240
LLVM_ABI unsigned getIntegerBitWidth() const
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
op_range operands()
Definition: User.h:292
Value * getOperand(unsigned i) const
Definition: User.h:232
unsigned getNumOperands() const
Definition: User.h:254
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:546
bool use_empty() const
Definition: Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1098
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:203
const ParentTy * getParent() const
Definition: ilist_node.h:34
self_iterator getIterator()
Definition: ilist_node.h:134
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:359
#define UINT64_MAX
Definition: DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
cst_pred_ty< is_power2 > m_Power2()
Match an integer or vector power-of-2.
Definition: PatternMatch.h:619
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:862
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:962
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
Definition: PatternMatch.h:189
cst_pred_ty< is_all_ones > m_scev_AllOnes()
Match an integer with all bits set.
class_match< const SCEVConstant > m_SCEVConstant()
SCEVBinaryExpr_match< SCEVMulExpr, Op0_t, Op1_t > m_scev_Mul(const Op0_t &Op0, const Op1_t &Op1)
bind_ty< const SCEVAddExpr > m_scev_Add(const SCEVAddExpr *&V)
SCEVUnaryExpr_match< SCEVPtrToIntExpr, Op0_t > m_scev_PtrToInt(const Op0_t &Op0)
class_match< const SCEV > m_SCEV()
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void visitAll(const SCEV *Root, SV &Visitor)
Use SCEVTraversal to visit all nodes in the given expression tree.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:338
@ Offset
Definition: DWP.cpp:477
void stable_sort(R &&Range)
Definition: STLExtras.h:2077
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
Definition: ScopeExit.h:59
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2491
auto pred_end(const MachineBasicBlock *BB)
constexpr from_range_t from_range
auto pred_size(const MachineBasicBlock *BB)
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
LLVM_ABI cl::opt< unsigned > SCEVCheapExpansionBudget
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
LLVM_ABI const SCEV * normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE, bool CheckInvertible=true)
Normalize S to be post-increment for all loops present in Loops.
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:223
LLVM_ABI bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
Definition: LCSSA.cpp:308
auto pred_begin(const MachineBasicBlock *BB)
auto predecessors(const MachineBasicBlock *BB)
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition: iterator.h:363
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1916
InstructionCost Cost
LLVM_ABI std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858
LLVM_ABI void apply(Instruction *I)
LLVM_ABI PoisonFlags(const Instruction *I)
struct for holding enough information to help calculate the cost of the given SCEV when expanded into...