LLVM 22.0.0git
LoopVectorizationLegality.cpp
Go to the documentation of this file.
1//===- LoopVectorizationLegality.cpp --------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides loop vectorization legality analysis. Original code
10// resided in LoopVectorize.cpp for a long time.
11//
12// At this point, it is implemented as a utility class, not as an analysis
13// pass. It should be easy to create an analysis pass around it if there
14// is a need (but D45420 needs to happen first).
15//
16
18#include "llvm/Analysis/Loads.h"
30
31using namespace llvm;
32using namespace PatternMatch;
33
34#define LV_NAME "loop-vectorize"
35#define DEBUG_TYPE LV_NAME
36
37static cl::opt<bool>
38 EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
39 cl::desc("Enable if-conversion during vectorization."));
40
41static cl::opt<bool>
42AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden,
43 cl::desc("Enable recognition of non-constant strided "
44 "pointer induction variables."));
45
46static cl::opt<bool>
47 HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
48 cl::desc("Allow enabling loop hints to reorder "
49 "FP operations during vectorization."));
50
51// TODO: Move size-based thresholds out of legality checking, make cost based
52// decisions instead of hard thresholds.
54 "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
55 cl::desc("The maximum number of SCEV checks allowed."));
56
58 "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
59 cl::desc("The maximum number of SCEV checks allowed with a "
60 "vectorize(enable) pragma"));
61
64 "scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified),
66 cl::desc("Control whether the compiler can use scalable vectors to "
67 "vectorize a loop"),
70 "Scalable vectorization is disabled."),
73 "Scalable vectorization is available and favored when the "
74 "cost is inconclusive."),
77 "Scalable vectorization is available and favored when the "
78 "cost is inconclusive.")));
79
81 "enable-histogram-loop-vectorization", cl::init(false), cl::Hidden,
82 cl::desc("Enables autovectorization of some loops containing histograms"));
83
84/// Maximum vectorization interleave count.
85static const unsigned MaxInterleaveFactor = 16;
86
87namespace llvm {
88
89bool LoopVectorizeHints::Hint::validate(unsigned Val) {
90 switch (Kind) {
91 case HK_WIDTH:
93 case HK_INTERLEAVE:
94 return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
95 case HK_FORCE:
96 return (Val <= 1);
97 case HK_ISVECTORIZED:
98 case HK_PREDICATE:
99 case HK_SCALABLE:
100 return (Val == 0 || Val == 1);
101 }
102 return false;
103}
104
106 bool InterleaveOnlyWhenForced,
109 : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
110 Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
111 Force("vectorize.enable", FK_Undefined, HK_FORCE),
112 IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
113 Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
114 Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
115 TheLoop(L), ORE(ORE) {
116 // Populate values with existing loop metadata.
117 getHintsFromMetadata();
118
119 // force-vector-interleave overrides DisableInterleaving.
122
123 // If the metadata doesn't explicitly specify whether to enable scalable
124 // vectorization, then decide based on the following criteria (increasing
125 // level of priority):
126 // - Target default
127 // - Metadata width
128 // - Force option (always overrides)
130 if (TTI)
133
134 if (Width.Value)
135 // If the width is set, but the metadata says nothing about the scalable
136 // property, then assume it concerns only a fixed-width UserVF.
137 // If width is not set, the flag takes precedence.
138 Scalable.Value = SK_FixedWidthOnly;
139 }
140
141 // If the flag is set to force any use of scalable vectors, override the loop
142 // hints.
143 if (ForceScalableVectorization.getValue() !=
145 Scalable.Value = ForceScalableVectorization.getValue();
146
147 // Scalable vectorization is disabled if no preference is specified.
149 Scalable.Value = SK_FixedWidthOnly;
150
151 if (IsVectorized.Value != 1)
152 // If the vectorization width and interleaving count are both 1 then
153 // consider the loop to have been already vectorized because there's
154 // nothing more that we can do.
155 IsVectorized.Value =
157 LLVM_DEBUG(if (InterleaveOnlyWhenForced && getInterleave() == 1) dbgs()
158 << "LV: Interleaving disabled by the pass manager\n");
159}
160
162 LLVMContext &Context = TheLoop->getHeader()->getContext();
163
164 MDNode *IsVectorizedMD = MDNode::get(
165 Context,
166 {MDString::get(Context, "llvm.loop.isvectorized"),
167 ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
168 MDNode *LoopID = TheLoop->getLoopID();
169 MDNode *NewLoopID =
171 {Twine(Prefix(), "vectorize.").str(),
172 Twine(Prefix(), "interleave.").str()},
173 {IsVectorizedMD});
174 TheLoop->setLoopID(NewLoopID);
175
176 // Update internal cache.
177 IsVectorized.Value = 1;
178}
179
181 Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
183 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
185 return false;
186 }
187
188 if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
189 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
191 return false;
192 }
193
194 if (getIsVectorized() == 1) {
195 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
196 // FIXME: Add interleave.disable metadata. This will allow
197 // vectorize.disable to be used without disabling the pass and errors
198 // to differentiate between disabled vectorization and a width of 1.
199 ORE.emit([&]() {
201 "AllDisabled", L->getStartLoc(),
202 L->getHeader())
203 << "loop not vectorized: vectorization and interleaving are "
204 "explicitly disabled, or the loop has already been "
205 "vectorized";
206 });
207 return false;
208 }
209
210 return true;
211}
212
214 using namespace ore;
215
216 ORE.emit([&]() {
217 if (Force.Value == LoopVectorizeHints::FK_Disabled)
218 return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
219 TheLoop->getStartLoc(),
220 TheLoop->getHeader())
221 << "loop not vectorized: vectorization is explicitly disabled";
222
223 OptimizationRemarkMissed R(LV_NAME, "MissedDetails", TheLoop->getStartLoc(),
224 TheLoop->getHeader());
225 R << "loop not vectorized";
226 if (Force.Value == LoopVectorizeHints::FK_Enabled) {
227 R << " (Force=" << NV("Force", true);
228 if (Width.Value != 0)
229 R << ", Vector Width=" << NV("VectorWidth", getWidth());
230 if (getInterleave() != 0)
231 R << ", Interleave Count=" << NV("InterleaveCount", getInterleave());
232 R << ")";
233 }
234 return R;
235 });
236}
237
240 return LV_NAME;
242 return LV_NAME;
244 return LV_NAME;
246}
247
249 // Allow the vectorizer to change the order of operations if enabling
250 // loop hints are provided
251 ElementCount EC = getWidth();
252 return HintsAllowReordering &&
254 EC.getKnownMinValue() > 1);
255}
256
257void LoopVectorizeHints::getHintsFromMetadata() {
258 MDNode *LoopID = TheLoop->getLoopID();
259 if (!LoopID)
260 return;
261
262 // First operand should refer to the loop id itself.
263 assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
264 assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
265
266 for (const MDOperand &MDO : llvm::drop_begin(LoopID->operands())) {
267 const MDString *S = nullptr;
269
270 // The expected hint is either a MDString or a MDNode with the first
271 // operand a MDString.
272 if (const MDNode *MD = dyn_cast<MDNode>(MDO)) {
273 if (!MD || MD->getNumOperands() == 0)
274 continue;
275 S = dyn_cast<MDString>(MD->getOperand(0));
276 for (unsigned Idx = 1; Idx < MD->getNumOperands(); ++Idx)
277 Args.push_back(MD->getOperand(Idx));
278 } else {
279 S = dyn_cast<MDString>(MDO);
280 assert(Args.size() == 0 && "too many arguments for MDString");
281 }
282
283 if (!S)
284 continue;
285
286 // Check if the hint starts with the loop metadata prefix.
287 StringRef Name = S->getString();
288 if (Args.size() == 1)
289 setHint(Name, Args[0]);
290 }
291}
292
293void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
294 if (!Name.starts_with(Prefix()))
295 return;
296 Name = Name.substr(Prefix().size(), StringRef::npos);
297
298 const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
299 if (!C)
300 return;
301 unsigned Val = C->getZExtValue();
302
303 Hint *Hints[] = {&Width, &Interleave, &Force,
304 &IsVectorized, &Predicate, &Scalable};
305 for (auto *H : Hints) {
306 if (Name == H->Name) {
307 if (H->validate(Val))
308 H->Value = Val;
309 else
310 LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
311 break;
312 }
313 }
314}
315
316// Return true if the inner loop \p Lp is uniform with regard to the outer loop
317// \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
318// executing the inner loop will execute the same iterations). This check is
319// very constrained for now but it will be relaxed in the future. \p Lp is
320// considered uniform if it meets all the following conditions:
321// 1) it has a canonical IV (starting from 0 and with stride 1),
322// 2) its latch terminator is a conditional branch and,
323// 3) its latch condition is a compare instruction whose operands are the
324// canonical IV and an OuterLp invariant.
325// This check doesn't take into account the uniformity of other conditions not
326// related to the loop latch because they don't affect the loop uniformity.
327//
328// NOTE: We decided to keep all these checks and its associated documentation
329// together so that we can easily have a picture of the current supported loop
330// nests. However, some of the current checks don't depend on \p OuterLp and
331// would be redundantly executed for each \p Lp if we invoked this function for
332// different candidate outer loops. This is not the case for now because we
333// don't currently have the infrastructure to evaluate multiple candidate outer
334// loops and \p OuterLp will be a fixed parameter while we only support explicit
335// outer loop vectorization. It's also very likely that these checks go away
336// before introducing the aforementioned infrastructure. However, if this is not
337// the case, we should move the \p OuterLp independent checks to a separate
338// function that is only executed once for each \p Lp.
339static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
340 assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
341
342 // If Lp is the outer loop, it's uniform by definition.
343 if (Lp == OuterLp)
344 return true;
345 assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
346
347 // 1.
349 if (!IV) {
350 LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
351 return false;
352 }
353
354 // 2.
355 BasicBlock *Latch = Lp->getLoopLatch();
356 auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
357 if (!LatchBr || LatchBr->isUnconditional()) {
358 LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
359 return false;
360 }
361
362 // 3.
363 auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
364 if (!LatchCmp) {
366 dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
367 return false;
368 }
369
370 Value *CondOp0 = LatchCmp->getOperand(0);
371 Value *CondOp1 = LatchCmp->getOperand(1);
372 Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
373 if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
374 !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
375 LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
376 return false;
377 }
378
379 return true;
380}
381
382// Return true if \p Lp and all its nested loops are uniform with regard to \p
383// OuterLp.
384static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
385 if (!isUniformLoop(Lp, OuterLp))
386 return false;
387
388 // Check if nested loops are uniform.
389 for (Loop *SubLp : *Lp)
390 if (!isUniformLoopNest(SubLp, OuterLp))
391 return false;
392
393 return true;
394}
395
397 assert(Ty->isIntOrPtrTy() && "Expected integer or pointer type");
398
399 if (Ty->isPointerTy())
400 return DL.getIntPtrType(Ty->getContext(), Ty->getPointerAddressSpace());
401
402 // It is possible that char's or short's overflow when we ask for the loop's
403 // trip count, work around this by changing the type size.
404 if (Ty->getScalarSizeInBits() < 32)
405 return Type::getInt32Ty(Ty->getContext());
406
407 return cast<IntegerType>(Ty);
408}
409
411 Type *Ty1) {
414 return TyA->getScalarSizeInBits() > TyB->getScalarSizeInBits() ? TyA : TyB;
415}
416
417/// Check that the instruction has outside loop users and is not an
418/// identified reduction variable.
419static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
420 SmallPtrSetImpl<Value *> &AllowedExit) {
421 // Reductions, Inductions and non-header phis are allowed to have exit users. All
422 // other instructions must not have external users.
423 if (!AllowedExit.count(Inst))
424 // Check that all of the users of the loop are inside the BB.
425 for (User *U : Inst->users()) {
426 Instruction *UI = cast<Instruction>(U);
427 // This user may be a reduction exit value.
428 if (!TheLoop->contains(UI)) {
429 LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
430 return true;
431 }
432 }
433 return false;
434}
435
436/// Returns true if A and B have same pointer operands or same SCEVs addresses
438 StoreInst *B) {
439 // Compare store
440 if (A == B)
441 return true;
442
443 // Otherwise Compare pointers
444 Value *APtr = A->getPointerOperand();
445 Value *BPtr = B->getPointerOperand();
446 if (APtr == BPtr)
447 return true;
448
449 // Otherwise compare address SCEVs
450 return SE->getSCEV(APtr) == SE->getSCEV(BPtr);
451}
452
454 Value *Ptr) const {
455 // FIXME: Currently, the set of symbolic strides is sometimes queried before
456 // it's collected. This happens from canVectorizeWithIfConvert, when the
457 // pointer is checked to reference consecutive elements suitable for a
458 // masked access.
459 const auto &Strides =
461
462 bool CanAddPredicate = !llvm::shouldOptimizeForSize(
463 TheLoop->getHeader(), PSI, BFI, PGSOQueryType::IRPass);
464 int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, Strides,
465 CanAddPredicate, false).value_or(0);
466 if (Stride == 1 || Stride == -1)
467 return Stride;
468 return 0;
469}
470
472 return LAI->isInvariant(V);
473}
474
475namespace {
476/// A rewriter to build the SCEVs for each of the VF lanes in the expected
477/// vectorized loop, which can then be compared to detect their uniformity. This
478/// is done by replacing the AddRec SCEVs of the original scalar loop (TheLoop)
479/// with new AddRecs where the step is multiplied by StepMultiplier and Offset *
480/// Step is added. Also checks if all sub-expressions are analyzable w.r.t.
481/// uniformity.
482class SCEVAddRecForUniformityRewriter
483 : public SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter> {
484 /// Multiplier to be applied to the step of AddRecs in TheLoop.
485 unsigned StepMultiplier;
486
487 /// Offset to be added to the AddRecs in TheLoop.
488 unsigned Offset;
489
490 /// Loop for which to rewrite AddRecsFor.
491 Loop *TheLoop;
492
493 /// Is any sub-expressions not analyzable w.r.t. uniformity?
494 bool CannotAnalyze = false;
495
496 bool canAnalyze() const { return !CannotAnalyze; }
497
498public:
499 SCEVAddRecForUniformityRewriter(ScalarEvolution &SE, unsigned StepMultiplier,
500 unsigned Offset, Loop *TheLoop)
501 : SCEVRewriteVisitor(SE), StepMultiplier(StepMultiplier), Offset(Offset),
502 TheLoop(TheLoop) {}
503
504 const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
505 assert(Expr->getLoop() == TheLoop &&
506 "addrec outside of TheLoop must be invariant and should have been "
507 "handled earlier");
508 // Build a new AddRec by multiplying the step by StepMultiplier and
509 // incrementing the start by Offset * step.
510 Type *Ty = Expr->getType();
511 const SCEV *Step = Expr->getStepRecurrence(SE);
512 if (!SE.isLoopInvariant(Step, TheLoop)) {
513 CannotAnalyze = true;
514 return Expr;
515 }
516 const SCEV *NewStep =
517 SE.getMulExpr(Step, SE.getConstant(Ty, StepMultiplier));
518 const SCEV *ScaledOffset = SE.getMulExpr(Step, SE.getConstant(Ty, Offset));
519 const SCEV *NewStart = SE.getAddExpr(Expr->getStart(), ScaledOffset);
520 return SE.getAddRecExpr(NewStart, NewStep, TheLoop, SCEV::FlagAnyWrap);
521 }
522
523 const SCEV *visit(const SCEV *S) {
524 if (CannotAnalyze || SE.isLoopInvariant(S, TheLoop))
525 return S;
527 }
528
529 const SCEV *visitUnknown(const SCEVUnknown *S) {
530 if (SE.isLoopInvariant(S, TheLoop))
531 return S;
532 // The value could vary across iterations.
533 CannotAnalyze = true;
534 return S;
535 }
536
537 const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *S) {
538 // Could not analyze the expression.
539 CannotAnalyze = true;
540 return S;
541 }
542
543 static const SCEV *rewrite(const SCEV *S, ScalarEvolution &SE,
544 unsigned StepMultiplier, unsigned Offset,
545 Loop *TheLoop) {
546 /// Bail out if the expression does not contain an UDiv expression.
547 /// Uniform values which are not loop invariant require operations to strip
548 /// out the lowest bits. For now just look for UDivs and use it to avoid
549 /// re-writing UDIV-free expressions for other lanes to limit compile time.
550 if (!SCEVExprContains(S,
551 [](const SCEV *S) { return isa<SCEVUDivExpr>(S); }))
552 return SE.getCouldNotCompute();
553
554 SCEVAddRecForUniformityRewriter Rewriter(SE, StepMultiplier, Offset,
555 TheLoop);
556 const SCEV *Result = Rewriter.visit(S);
557
558 if (Rewriter.canAnalyze())
559 return Result;
560 return SE.getCouldNotCompute();
561 }
562};
563
564} // namespace
565
567 if (isInvariant(V))
568 return true;
569 if (VF.isScalable())
570 return false;
571 if (VF.isScalar())
572 return true;
573
574 // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
575 // never considered uniform.
576 auto *SE = PSE.getSE();
577 if (!SE->isSCEVable(V->getType()))
578 return false;
579 const SCEV *S = SE->getSCEV(V);
580
581 // Rewrite AddRecs in TheLoop to step by VF and check if the expression for
582 // lane 0 matches the expressions for all other lanes.
583 unsigned FixedVF = VF.getKnownMinValue();
584 const SCEV *FirstLaneExpr =
585 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, 0, TheLoop);
586 if (isa<SCEVCouldNotCompute>(FirstLaneExpr))
587 return false;
588
589 // Make sure the expressions for lanes FixedVF-1..1 match the expression for
590 // lane 0. We check lanes in reverse order for compile-time, as frequently
591 // checking the last lane is sufficient to rule out uniformity.
592 return all_of(reverse(seq<unsigned>(1, FixedVF)), [&](unsigned I) {
593 const SCEV *IthLaneExpr =
594 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, I, TheLoop);
595 return FirstLaneExpr == IthLaneExpr;
596 });
597}
598
600 ElementCount VF) const {
602 if (!Ptr)
603 return false;
604 // Note: There's nothing inherent which prevents predicated loads and
605 // stores from being uniform. The current lowering simply doesn't handle
606 // it; in particular, the cost model distinguishes scatter/gather from
607 // scalar w/predication, and we currently rely on the scalar path.
608 return isUniform(Ptr, VF) && !blockNeedsPredication(I.getParent());
609}
610
611bool LoopVectorizationLegality::canVectorizeOuterLoop() {
612 assert(!TheLoop->isInnermost() && "We are not vectorizing an outer loop.");
613 // Store the result and return it at the end instead of exiting early, in case
614 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
615 bool Result = true;
616 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
617
618 for (BasicBlock *BB : TheLoop->blocks()) {
619 // Check whether the BB terminator is a BranchInst. Any other terminator is
620 // not supported yet.
621 auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
622 if (!Br) {
623 reportVectorizationFailure("Unsupported basic block terminator",
624 "loop control flow is not understood by vectorizer",
625 "CFGNotUnderstood", ORE, TheLoop);
626 if (DoExtraAnalysis)
627 Result = false;
628 else
629 return false;
630 }
631
632 // Check whether the BranchInst is a supported one. Only unconditional
633 // branches, conditional branches with an outer loop invariant condition or
634 // backedges are supported.
635 // FIXME: We skip these checks when VPlan predication is enabled as we
636 // want to allow divergent branches. This whole check will be removed
637 // once VPlan predication is on by default.
638 if (Br && Br->isConditional() &&
639 !TheLoop->isLoopInvariant(Br->getCondition()) &&
640 !LI->isLoopHeader(Br->getSuccessor(0)) &&
641 !LI->isLoopHeader(Br->getSuccessor(1))) {
642 reportVectorizationFailure("Unsupported conditional branch",
643 "loop control flow is not understood by vectorizer",
644 "CFGNotUnderstood", ORE, TheLoop);
645 if (DoExtraAnalysis)
646 Result = false;
647 else
648 return false;
649 }
650 }
651
652 // Check whether inner loops are uniform. At this point, we only support
653 // simple outer loops scenarios with uniform nested loops.
654 if (!isUniformLoopNest(TheLoop /*loop nest*/,
655 TheLoop /*context outer loop*/)) {
656 reportVectorizationFailure("Outer loop contains divergent loops",
657 "loop control flow is not understood by vectorizer",
658 "CFGNotUnderstood", ORE, TheLoop);
659 if (DoExtraAnalysis)
660 Result = false;
661 else
662 return false;
663 }
664
665 // Check whether we are able to set up outer loop induction.
666 if (!setupOuterLoopInductions()) {
667 reportVectorizationFailure("Unsupported outer loop Phi(s)",
668 "UnsupportedPhi", ORE, TheLoop);
669 if (DoExtraAnalysis)
670 Result = false;
671 else
672 return false;
673 }
674
675 return Result;
676}
677
678void LoopVectorizationLegality::addInductionPhi(
679 PHINode *Phi, const InductionDescriptor &ID,
680 SmallPtrSetImpl<Value *> &AllowedExit) {
681 Inductions[Phi] = ID;
682
683 // In case this induction also comes with casts that we know we can ignore
684 // in the vectorized loop body, record them here. All casts could be recorded
685 // here for ignoring, but suffices to record only the first (as it is the
686 // only one that may bw used outside the cast sequence).
687 const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
688 if (!Casts.empty())
689 InductionCastsToIgnore.insert(*Casts.begin());
690
691 Type *PhiTy = Phi->getType();
692 const DataLayout &DL = Phi->getDataLayout();
693
694 assert((PhiTy->isIntOrPtrTy() || PhiTy->isFloatingPointTy()) &&
695 "Expected int, ptr, or FP induction phi type");
696
697 // Get the widest type.
698 if (PhiTy->isIntOrPtrTy()) {
699 if (!WidestIndTy)
700 WidestIndTy = getInductionIntegerTy(DL, PhiTy);
701 else
702 WidestIndTy = getWiderInductionTy(DL, PhiTy, WidestIndTy);
703 }
704
705 // Int inductions are special because we only allow one IV.
706 if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
707 ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
708 isa<Constant>(ID.getStartValue()) &&
709 cast<Constant>(ID.getStartValue())->isNullValue()) {
710
711 // Use the phi node with the widest type as induction. Use the last
712 // one if there are multiple (no good reason for doing this other
713 // than it is expedient). We've checked that it begins at zero and
714 // steps by one, so this is a canonical induction variable.
715 if (!PrimaryInduction || PhiTy == WidestIndTy)
716 PrimaryInduction = Phi;
717 }
718
719 // Both the PHI node itself, and the "post-increment" value feeding
720 // back into the PHI node may have external users.
721 // We can allow those uses, except if the SCEVs we have for them rely
722 // on predicates that only hold within the loop, since allowing the exit
723 // currently means re-using this SCEV outside the loop (see PR33706 for more
724 // details).
725 if (PSE.getPredicate().isAlwaysTrue()) {
726 AllowedExit.insert(Phi);
727 AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
728 }
729
730 LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
731}
732
733bool LoopVectorizationLegality::setupOuterLoopInductions() {
734 BasicBlock *Header = TheLoop->getHeader();
735
736 // Returns true if a given Phi is a supported induction.
737 auto IsSupportedPhi = [&](PHINode &Phi) -> bool {
739 if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
741 addInductionPhi(&Phi, ID, AllowedExit);
742 return true;
743 }
744 // Bail out for any Phi in the outer loop header that is not a supported
745 // induction.
747 dbgs() << "LV: Found unsupported PHI for outer loop vectorization.\n");
748 return false;
749 };
750
751 return llvm::all_of(Header->phis(), IsSupportedPhi);
752}
753
754/// Checks if a function is scalarizable according to the TLI, in
755/// the sense that it should be vectorized and then expanded in
756/// multiple scalar calls. This is represented in the
757/// TLI via mappings that do not specify a vector name, as in the
758/// following example:
759///
760/// const VecDesc VecIntrinsics[] = {
761/// {"llvm.phx.abs.i32", "", 4}
762/// };
763static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
764 const StringRef ScalarName = CI.getCalledFunction()->getName();
765 bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
766 // Check that all known VFs are not associated to a vector
767 // function, i.e. the vector name is emty.
768 if (Scalarize) {
769 ElementCount WidestFixedVF, WidestScalableVF;
770 TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
772 ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2)
773 Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
775 ElementCount::isKnownLE(VF, WidestScalableVF); VF *= 2)
776 Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
777 assert((WidestScalableVF.isZero() || !Scalarize) &&
778 "Caller may decide to scalarize a variant using a scalable VF");
779 }
780 return Scalarize;
781}
782
783/// Returns true if the call return type `Ty` can be widened by the loop
784/// vectorizer.
785static bool canWidenCallReturnType(Type *Ty) {
786 auto *StructTy = dyn_cast<StructType>(Ty);
787 // TODO: Remove the homogeneous types restriction. This is just an initial
788 // simplification. When we want to support things like the overflow intrinsics
789 // we will have to lift this restriction.
790 if (StructTy && !StructTy->containsHomogeneousTypes())
791 return false;
792 return canVectorizeTy(StructTy);
793}
794
795bool LoopVectorizationLegality::canVectorizeInstrs() {
796 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
797 bool Result = true;
798
799 // For each block in the loop.
800 for (BasicBlock *BB : TheLoop->blocks()) {
801 // Scan the instructions in the block and look for hazards.
802 for (Instruction &I : *BB) {
803 Result &= canVectorizeInstr(I);
804 if (!DoExtraAnalysis && !Result)
805 return false;
806 }
807 }
808
809 if (!PrimaryInduction) {
810 if (Inductions.empty()) {
812 "Did not find one integer induction var",
813 "loop induction variable could not be identified",
814 "NoInductionVariable", ORE, TheLoop);
815 return false;
816 }
817 if (!WidestIndTy) {
819 "Did not find one integer induction var",
820 "integer loop induction variable could not be identified",
821 "NoIntegerInductionVariable", ORE, TheLoop);
822 return false;
823 }
824 LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
825 }
826
827 // Now we know the widest induction type, check if our found induction
828 // is the same size. If it's not, unset it here and InnerLoopVectorizer
829 // will create another.
830 if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
831 PrimaryInduction = nullptr;
832
833 return Result;
834}
835
836bool LoopVectorizationLegality::canVectorizeInstr(Instruction &I) {
837 BasicBlock *BB = I.getParent();
838 BasicBlock *Header = TheLoop->getHeader();
839
840 if (auto *Phi = dyn_cast<PHINode>(&I)) {
841 Type *PhiTy = Phi->getType();
842 // Check that this PHI type is allowed.
843 if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
844 !PhiTy->isPointerTy()) {
846 "Found a non-int non-pointer PHI",
847 "loop control flow is not understood by vectorizer",
848 "CFGNotUnderstood", ORE, TheLoop);
849 return false;
850 }
851
852 // If this PHINode is not in the header block, then we know that we
853 // can convert it to select during if-conversion. No need to check if
854 // the PHIs in this block are induction or reduction variables.
855 if (BB != Header) {
856 // Non-header phi nodes that have outside uses can be vectorized. Add
857 // them to the list of allowed exits.
858 // Unsafe cyclic dependencies with header phis are identified during
859 // legalization for reduction, induction and fixed order
860 // recurrences.
861 AllowedExit.insert(&I);
862 return true;
863 }
864
865 // We only allow if-converted PHIs with exactly two incoming values.
866 if (Phi->getNumIncomingValues() != 2) {
868 "Found an invalid PHI",
869 "loop control flow is not understood by vectorizer",
870 "CFGNotUnderstood", ORE, TheLoop, Phi);
871 return false;
872 }
873
875 if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC, DT,
876 PSE.getSE())) {
877 Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
878 AllowedExit.insert(RedDes.getLoopExitInstr());
879 Reductions[Phi] = RedDes;
880 return true;
881 }
882
883 // We prevent matching non-constant strided pointer IVS to preserve
884 // historical vectorizer behavior after a generalization of the
885 // IVDescriptor code. The intent is to remove this check, but we
886 // have to fix issues around code quality for such loops first.
887 auto IsDisallowedStridedPointerInduction =
888 [](const InductionDescriptor &ID) {
890 return false;
891 return ID.getKind() == InductionDescriptor::IK_PtrInduction &&
892 ID.getConstIntStepValue() == nullptr;
893 };
894
895 // TODO: Instead of recording the AllowedExit, it would be good to
896 // record the complementary set: NotAllowedExit. These include (but may
897 // not be limited to):
898 // 1. Reduction phis as they represent the one-before-last value, which
899 // is not available when vectorized
900 // 2. Induction phis and increment when SCEV predicates cannot be used
901 // outside the loop - see addInductionPhi
902 // 3. Non-Phis with outside uses when SCEV predicates cannot be used
903 // outside the loop - see call to hasOutsideLoopUser in the non-phi
904 // handling below
905 // 4. FixedOrderRecurrence phis that can possibly be handled by
906 // extraction.
907 // By recording these, we can then reason about ways to vectorize each
908 // of these NotAllowedExit.
910 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID) &&
911 !IsDisallowedStridedPointerInduction(ID)) {
912 addInductionPhi(Phi, ID, AllowedExit);
913 Requirements->addExactFPMathInst(ID.getExactFPMathInst());
914 return true;
915 }
916
917 if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) {
918 AllowedExit.insert(Phi);
919 FixedOrderRecurrences.insert(Phi);
920 return true;
921 }
922
923 // As a last resort, coerce the PHI to a AddRec expression
924 // and re-try classifying it a an induction PHI.
925 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true) &&
926 !IsDisallowedStridedPointerInduction(ID)) {
927 addInductionPhi(Phi, ID, AllowedExit);
928 return true;
929 }
930
931 reportVectorizationFailure("Found an unidentified PHI",
932 "value that could not be identified as "
933 "reduction is used outside the loop",
934 "NonReductionValueUsedOutsideLoop", ORE, TheLoop,
935 Phi);
936 return false;
937 } // end of PHI handling
938
939 // We handle calls that:
940 // * Have a mapping to an IR intrinsic.
941 // * Have a vector version available.
942 auto *CI = dyn_cast<CallInst>(&I);
943
944 if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
945 !(CI->getCalledFunction() && TLI &&
946 (!VFDatabase::getMappings(*CI).empty() || isTLIScalarize(*TLI, *CI)))) {
947 // If the call is a recognized math libary call, it is likely that
948 // we can vectorize it given loosened floating-point constraints.
950 bool IsMathLibCall =
951 TLI && CI->getCalledFunction() && CI->getType()->isFloatingPointTy() &&
952 TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
953 TLI->hasOptimizedCodeGen(Func);
954
955 if (IsMathLibCall) {
956 // TODO: Ideally, we should not use clang-specific language here,
957 // but it's hard to provide meaningful yet generic advice.
958 // Also, should this be guarded by allowExtraAnalysis() and/or be part
959 // of the returned info from isFunctionVectorizable()?
961 "Found a non-intrinsic callsite",
962 "library call cannot be vectorized. "
963 "Try compiling with -fno-math-errno, -ffast-math, "
964 "or similar flags",
965 "CantVectorizeLibcall", ORE, TheLoop, CI);
966 } else {
967 reportVectorizationFailure("Found a non-intrinsic callsite",
968 "call instruction cannot be vectorized",
969 "CantVectorizeLibcall", ORE, TheLoop, CI);
970 }
971 return false;
972 }
973
974 // Some intrinsics have scalar arguments and should be same in order for
975 // them to be vectorized (i.e. loop invariant).
976 if (CI) {
977 auto *SE = PSE.getSE();
978 Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
979 for (unsigned Idx = 0; Idx < CI->arg_size(); ++Idx)
981 if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(Idx)), TheLoop)) {
983 "Found unvectorizable intrinsic",
984 "intrinsic instruction cannot be vectorized",
985 "CantVectorizeIntrinsic", ORE, TheLoop, CI);
986 return false;
987 }
988 }
989 }
990
991 // If we found a vectorized variant of a function, note that so LV can
992 // make better decisions about maximum VF.
993 if (CI && !VFDatabase::getMappings(*CI).empty())
994 VecCallVariantsFound = true;
995
996 auto CanWidenInstructionTy = [](Instruction const &Inst) {
997 Type *InstTy = Inst.getType();
998 if (!isa<StructType>(InstTy))
999 return canVectorizeTy(InstTy);
1000
1001 // For now, we only recognize struct values returned from calls where
1002 // all users are extractvalue as vectorizable. All element types of the
1003 // struct must be types that can be widened.
1004 return isa<CallInst>(Inst) && canWidenCallReturnType(InstTy) &&
1005 all_of(Inst.users(), IsaPred<ExtractValueInst>);
1006 };
1007
1008 // Check that the instruction return type is vectorizable.
1009 // We can't vectorize casts from vector type to scalar type.
1010 // Also, we can't vectorize extractelement instructions.
1011 if (!CanWidenInstructionTy(I) ||
1012 (isa<CastInst>(I) &&
1013 !VectorType::isValidElementType(I.getOperand(0)->getType())) ||
1014 isa<ExtractElementInst>(I)) {
1015 reportVectorizationFailure("Found unvectorizable type",
1016 "instruction return type cannot be vectorized",
1017 "CantVectorizeInstructionReturnType", ORE,
1018 TheLoop, &I);
1019 return false;
1020 }
1021
1022 // Check that the stored type is vectorizable.
1023 if (auto *ST = dyn_cast<StoreInst>(&I)) {
1024 Type *T = ST->getValueOperand()->getType();
1026 reportVectorizationFailure("Store instruction cannot be vectorized",
1027 "CantVectorizeStore", ORE, TheLoop, ST);
1028 return false;
1029 }
1030
1031 // For nontemporal stores, check that a nontemporal vector version is
1032 // supported on the target.
1033 if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
1034 // Arbitrarily try a vector of 2 elements.
1035 auto *VecTy = FixedVectorType::get(T, /*NumElts=*/2);
1036 assert(VecTy && "did not find vectorized version of stored type");
1037 if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
1039 "nontemporal store instruction cannot be vectorized",
1040 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
1041 return false;
1042 }
1043 }
1044
1045 } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
1046 if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
1047 // For nontemporal loads, check that a nontemporal vector version is
1048 // supported on the target (arbitrarily try a vector of 2 elements).
1049 auto *VecTy = FixedVectorType::get(I.getType(), /*NumElts=*/2);
1050 assert(VecTy && "did not find vectorized version of load type");
1051 if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
1053 "nontemporal load instruction cannot be vectorized",
1054 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
1055 return false;
1056 }
1057 }
1058
1059 // FP instructions can allow unsafe algebra, thus vectorizable by
1060 // non-IEEE-754 compliant SIMD units.
1061 // This applies to floating-point math operations and calls, not memory
1062 // operations, shuffles, or casts, as they don't change precision or
1063 // semantics.
1064 } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
1065 !I.isFast()) {
1066 LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
1067 Hints->setPotentiallyUnsafe();
1068 }
1069
1070 // Reduction instructions are allowed to have exit users.
1071 // All other instructions must not have external users.
1072 if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
1073 // We can safely vectorize loops where instructions within the loop are
1074 // used outside the loop only if the SCEV predicates within the loop is
1075 // same as outside the loop. Allowing the exit means reusing the SCEV
1076 // outside the loop.
1077 if (PSE.getPredicate().isAlwaysTrue()) {
1078 AllowedExit.insert(&I);
1079 return true;
1080 }
1081 reportVectorizationFailure("Value cannot be used outside the loop",
1082 "ValueUsedOutsideLoop", ORE, TheLoop, &I);
1083 return false;
1084 }
1085
1086 return true;
1087}
1088
1089/// Find histogram operations that match high-level code in loops:
1090/// \code
1091/// buckets[indices[i]]+=step;
1092/// \endcode
1093///
1094/// It matches a pattern starting from \p HSt, which Stores to the 'buckets'
1095/// array the computed histogram. It uses a BinOp to sum all counts, storing
1096/// them using a loop-variant index Load from the 'indices' input array.
1097///
1098/// On successful matches it updates the STATISTIC 'HistogramsDetected',
1099/// regardless of hardware support. When there is support, it additionally
1100/// stores the BinOp/Load pairs in \p HistogramCounts, as well the pointers
1101/// used to update histogram in \p HistogramPtrs.
1102static bool findHistogram(LoadInst *LI, StoreInst *HSt, Loop *TheLoop,
1103 const PredicatedScalarEvolution &PSE,
1104 SmallVectorImpl<HistogramInfo> &Histograms) {
1105
1106 // Store value must come from a Binary Operation.
1107 Instruction *HPtrInstr = nullptr;
1108 BinaryOperator *HBinOp = nullptr;
1109 if (!match(HSt, m_Store(m_BinOp(HBinOp), m_Instruction(HPtrInstr))))
1110 return false;
1111
1112 // BinOp must be an Add or a Sub modifying the bucket value by a
1113 // loop invariant amount.
1114 // FIXME: We assume the loop invariant term is on the RHS.
1115 // Fine for an immediate/constant, but maybe not a generic value?
1116 Value *HIncVal = nullptr;
1117 if (!match(HBinOp, m_Add(m_Load(m_Specific(HPtrInstr)), m_Value(HIncVal))) &&
1118 !match(HBinOp, m_Sub(m_Load(m_Specific(HPtrInstr)), m_Value(HIncVal))))
1119 return false;
1120
1121 // Make sure the increment value is loop invariant.
1122 if (!TheLoop->isLoopInvariant(HIncVal))
1123 return false;
1124
1125 // The address to store is calculated through a GEP Instruction.
1126 GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(HPtrInstr);
1127 if (!GEP)
1128 return false;
1129
1130 // Restrict address calculation to constant indices except for the last term.
1131 Value *HIdx = nullptr;
1132 for (Value *Index : GEP->indices()) {
1133 if (HIdx)
1134 return false;
1135 if (!isa<ConstantInt>(Index))
1136 HIdx = Index;
1137 }
1138
1139 if (!HIdx)
1140 return false;
1141
1142 // Check that the index is calculated by loading from another array. Ignore
1143 // any extensions.
1144 // FIXME: Support indices from other sources than a linear load from memory?
1145 // We're currently trying to match an operation looping over an array
1146 // of indices, but there could be additional levels of indirection
1147 // in place, or possibly some additional calculation to form the index
1148 // from the loaded data.
1149 Value *VPtrVal;
1150 if (!match(HIdx, m_ZExtOrSExtOrSelf(m_Load(m_Value(VPtrVal)))))
1151 return false;
1152
1153 // Make sure the index address varies in this loop, not an outer loop.
1154 const auto *AR = dyn_cast<SCEVAddRecExpr>(PSE.getSE()->getSCEV(VPtrVal));
1155 if (!AR || AR->getLoop() != TheLoop)
1156 return false;
1157
1158 // Ensure we'll have the same mask by checking that all parts of the histogram
1159 // (gather load, update, scatter store) are in the same block.
1160 LoadInst *IndexedLoad = cast<LoadInst>(HBinOp->getOperand(0));
1161 BasicBlock *LdBB = IndexedLoad->getParent();
1162 if (LdBB != HBinOp->getParent() || LdBB != HSt->getParent())
1163 return false;
1164
1165 LLVM_DEBUG(dbgs() << "LV: Found histogram for: " << *HSt << "\n");
1166
1167 // Store the operations that make up the histogram.
1168 Histograms.emplace_back(IndexedLoad, HBinOp, HSt);
1169 return true;
1170}
1171
1172bool LoopVectorizationLegality::canVectorizeIndirectUnsafeDependences() {
1173 // For now, we only support an IndirectUnsafe dependency that calculates
1174 // a histogram
1176 return false;
1177
1178 // Find a single IndirectUnsafe dependency.
1179 const MemoryDepChecker::Dependence *IUDep = nullptr;
1180 const MemoryDepChecker &DepChecker = LAI->getDepChecker();
1181 const auto *Deps = DepChecker.getDependences();
1182 // If there were too many dependences, LAA abandons recording them. We can't
1183 // proceed safely if we don't know what the dependences are.
1184 if (!Deps)
1185 return false;
1186
1187 for (const MemoryDepChecker::Dependence &Dep : *Deps) {
1188 // Ignore dependencies that are either known to be safe or can be
1189 // checked at runtime.
1192 continue;
1193
1194 // We're only interested in IndirectUnsafe dependencies here, where the
1195 // address might come from a load from memory. We also only want to handle
1196 // one such dependency, at least for now.
1197 if (Dep.Type != MemoryDepChecker::Dependence::IndirectUnsafe || IUDep)
1198 return false;
1199
1200 IUDep = &Dep;
1201 }
1202 if (!IUDep)
1203 return false;
1204
1205 // For now only normal loads and stores are supported.
1206 LoadInst *LI = dyn_cast<LoadInst>(IUDep->getSource(DepChecker));
1207 StoreInst *SI = dyn_cast<StoreInst>(IUDep->getDestination(DepChecker));
1208
1209 if (!LI || !SI)
1210 return false;
1211
1212 LLVM_DEBUG(dbgs() << "LV: Checking for a histogram on: " << *SI << "\n");
1213 return findHistogram(LI, SI, TheLoop, LAI->getPSE(), Histograms);
1214}
1215
1216bool LoopVectorizationLegality::canVectorizeMemory() {
1217 LAI = &LAIs.getInfo(*TheLoop);
1218 const OptimizationRemarkAnalysis *LAR = LAI->getReport();
1219 if (LAR) {
1220 ORE->emit([&]() {
1221 return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
1222 "loop not vectorized: ", *LAR);
1223 });
1224 }
1225
1226 if (!LAI->canVectorizeMemory())
1227 return canVectorizeIndirectUnsafeDependences();
1228
1230 reportVectorizationFailure("We don't allow storing to uniform addresses",
1231 "write to a loop invariant address could not "
1232 "be vectorized",
1233 "CantVectorizeStoreToLoopInvariantAddress", ORE,
1234 TheLoop);
1235 return false;
1236 }
1237
1238 // We can vectorize stores to invariant address when final reduction value is
1239 // guaranteed to be stored at the end of the loop. Also, if decision to
1240 // vectorize loop is made, runtime checks are added so as to make sure that
1241 // invariant address won't alias with any other objects.
1242 if (!LAI->getStoresToInvariantAddresses().empty()) {
1243 // For each invariant address, check if last stored value is unconditional
1244 // and the address is not calculated inside the loop.
1245 for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1247 continue;
1248
1249 if (blockNeedsPredication(SI->getParent())) {
1251 "We don't allow storing to uniform addresses",
1252 "write of conditional recurring variant value to a loop "
1253 "invariant address could not be vectorized",
1254 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1255 return false;
1256 }
1257
1258 // Invariant address should be defined outside of loop. LICM pass usually
1259 // makes sure it happens, but in rare cases it does not, we do not want
1260 // to overcomplicate vectorization to support this case.
1261 if (Instruction *Ptr = dyn_cast<Instruction>(SI->getPointerOperand())) {
1262 if (TheLoop->contains(Ptr)) {
1264 "Invariant address is calculated inside the loop",
1265 "write to a loop invariant address could not "
1266 "be vectorized",
1267 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1268 return false;
1269 }
1270 }
1271 }
1272
1274 // For each invariant address, check its last stored value is the result
1275 // of one of our reductions.
1276 //
1277 // We do not check if dependence with loads exists because that is already
1278 // checked via hasLoadStoreDependenceInvolvingLoopInvariantAddress.
1279 ScalarEvolution *SE = PSE.getSE();
1280 SmallVector<StoreInst *, 4> UnhandledStores;
1281 for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1283 // Earlier stores to this address are effectively deadcode.
1284 // With opaque pointers it is possible for one pointer to be used with
1285 // different sizes of stored values:
1286 // store i32 0, ptr %x
1287 // store i8 0, ptr %x
1288 // The latest store doesn't complitely overwrite the first one in the
1289 // example. That is why we have to make sure that types of stored
1290 // values are same.
1291 // TODO: Check that bitwidth of unhandled store is smaller then the
1292 // one that overwrites it and add a test.
1293 erase_if(UnhandledStores, [SE, SI](StoreInst *I) {
1294 return storeToSameAddress(SE, SI, I) &&
1295 I->getValueOperand()->getType() ==
1296 SI->getValueOperand()->getType();
1297 });
1298 continue;
1299 }
1300 UnhandledStores.push_back(SI);
1301 }
1302
1303 bool IsOK = UnhandledStores.empty();
1304 // TODO: we should also validate against InvariantMemSets.
1305 if (!IsOK) {
1307 "We don't allow storing to uniform addresses",
1308 "write to a loop invariant address could not "
1309 "be vectorized",
1310 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1311 return false;
1312 }
1313 }
1314 }
1315
1316 PSE.addPredicate(LAI->getPSE().getPredicate());
1317 return true;
1318}
1319
1321 bool EnableStrictReductions) {
1322
1323 // First check if there is any ExactFP math or if we allow reassociations
1324 if (!Requirements->getExactFPInst() || Hints->allowReordering())
1325 return true;
1326
1327 // If the above is false, we have ExactFPMath & do not allow reordering.
1328 // If the EnableStrictReductions flag is set, first check if we have any
1329 // Exact FP induction vars, which we cannot vectorize.
1330 if (!EnableStrictReductions ||
1331 any_of(getInductionVars(), [&](auto &Induction) -> bool {
1332 InductionDescriptor IndDesc = Induction.second;
1333 return IndDesc.getExactFPMathInst();
1334 }))
1335 return false;
1336
1337 // We can now only vectorize if all reductions with Exact FP math also
1338 // have the isOrdered flag set, which indicates that we can move the
1339 // reduction operations in-loop.
1340 return (all_of(getReductionVars(), [&](auto &Reduction) -> bool {
1341 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1342 return !RdxDesc.hasExactFPMath() || RdxDesc.isOrdered();
1343 }));
1344}
1345
1347 return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1348 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1349 return RdxDesc.IntermediateStore == SI;
1350 });
1351}
1352
1354 return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1355 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1356 if (!RdxDesc.IntermediateStore)
1357 return false;
1358
1359 ScalarEvolution *SE = PSE.getSE();
1360 Value *InvariantAddress = RdxDesc.IntermediateStore->getPointerOperand();
1361 return V == InvariantAddress ||
1362 SE->getSCEV(V) == SE->getSCEV(InvariantAddress);
1363 });
1364}
1365
1367 Value *In0 = const_cast<Value *>(V);
1368 PHINode *PN = dyn_cast_or_null<PHINode>(In0);
1369 if (!PN)
1370 return false;
1371
1372 return Inductions.count(PN);
1373}
1374
1375const InductionDescriptor *
1377 if (!isInductionPhi(Phi))
1378 return nullptr;
1379 auto &ID = getInductionVars().find(Phi)->second;
1380 if (ID.getKind() == InductionDescriptor::IK_IntInduction ||
1382 return &ID;
1383 return nullptr;
1384}
1385
1386const InductionDescriptor *
1388 if (!isInductionPhi(Phi))
1389 return nullptr;
1390 auto &ID = getInductionVars().find(Phi)->second;
1392 return &ID;
1393 return nullptr;
1394}
1395
1397 const Value *V) const {
1398 auto *Inst = dyn_cast<Instruction>(V);
1399 return (Inst && InductionCastsToIgnore.count(Inst));
1400}
1401
1404}
1405
1407 const PHINode *Phi) const {
1408 return FixedOrderRecurrences.count(Phi);
1409}
1410
1412 // When vectorizing early exits, create predicates for the latch block only.
1413 // The early exiting block must be a direct predecessor of the latch at the
1414 // moment.
1415 BasicBlock *Latch = TheLoop->getLoopLatch();
1417 assert(
1419 "Uncountable exiting block must be a direct predecessor of latch");
1420 return BB == Latch;
1421 }
1422 return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
1423}
1424
1425bool LoopVectorizationLegality::blockCanBePredicated(
1426 BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
1427 SmallPtrSetImpl<const Instruction *> &MaskedOp) const {
1428 for (Instruction &I : *BB) {
1429 // We can predicate blocks with calls to assume, as long as we drop them in
1430 // case we flatten the CFG via predication.
1431 if (match(&I, m_Intrinsic<Intrinsic::assume>())) {
1432 MaskedOp.insert(&I);
1433 continue;
1434 }
1435
1436 // Do not let llvm.experimental.noalias.scope.decl block the vectorization.
1437 // TODO: there might be cases that it should block the vectorization. Let's
1438 // ignore those for now.
1439 if (isa<NoAliasScopeDeclInst>(&I))
1440 continue;
1441
1442 // We can allow masked calls if there's at least one vector variant, even
1443 // if we end up scalarizing due to the cost model calculations.
1444 // TODO: Allow other calls if they have appropriate attributes... readonly
1445 // and argmemonly?
1446 if (CallInst *CI = dyn_cast<CallInst>(&I))
1448 MaskedOp.insert(CI);
1449 continue;
1450 }
1451
1452 // Loads are handled via masking (or speculated if safe to do so.)
1453 if (auto *LI = dyn_cast<LoadInst>(&I)) {
1454 if (!SafePtrs.count(LI->getPointerOperand()))
1455 MaskedOp.insert(LI);
1456 continue;
1457 }
1458
1459 // Predicated store requires some form of masking:
1460 // 1) masked store HW instruction,
1461 // 2) emulation via load-blend-store (only if safe and legal to do so,
1462 // be aware on the race conditions), or
1463 // 3) element-by-element predicate check and scalar store.
1464 if (auto *SI = dyn_cast<StoreInst>(&I)) {
1465 MaskedOp.insert(SI);
1466 continue;
1467 }
1468
1469 if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow())
1470 return false;
1471 }
1472
1473 return true;
1474}
1475
1476bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
1477 if (!EnableIfConversion) {
1478 reportVectorizationFailure("If-conversion is disabled",
1479 "IfConversionDisabled", ORE, TheLoop);
1480 return false;
1481 }
1482
1483 assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
1484
1485 // A list of pointers which are known to be dereferenceable within scope of
1486 // the loop body for each iteration of the loop which executes. That is,
1487 // the memory pointed to can be dereferenced (with the access size implied by
1488 // the value's type) unconditionally within the loop header without
1489 // introducing a new fault.
1490 SmallPtrSet<Value *, 8> SafePointers;
1491
1492 // Collect safe addresses.
1493 for (BasicBlock *BB : TheLoop->blocks()) {
1494 if (!blockNeedsPredication(BB)) {
1495 for (Instruction &I : *BB)
1496 if (auto *Ptr = getLoadStorePointerOperand(&I))
1497 SafePointers.insert(Ptr);
1498 continue;
1499 }
1500
1501 // For a block which requires predication, a address may be safe to access
1502 // in the loop w/o predication if we can prove dereferenceability facts
1503 // sufficient to ensure it'll never fault within the loop. For the moment,
1504 // we restrict this to loads; stores are more complicated due to
1505 // concurrency restrictions.
1506 ScalarEvolution &SE = *PSE.getSE();
1508 for (Instruction &I : *BB) {
1509 LoadInst *LI = dyn_cast<LoadInst>(&I);
1510
1511 // Make sure we can execute all computations feeding into Ptr in the loop
1512 // w/o triggering UB and that none of the out-of-loop operands are poison.
1513 // We do not need to check if operations inside the loop can produce
1514 // poison due to flags (e.g. due to an inbounds GEP going out of bounds),
1515 // because flags will be dropped when executing them unconditionally.
1516 // TODO: Results could be improved by considering poison-propagation
1517 // properties of visited ops.
1518 auto CanSpeculatePointerOp = [this](Value *Ptr) {
1519 SmallVector<Value *> Worklist = {Ptr};
1521 while (!Worklist.empty()) {
1522 Value *CurrV = Worklist.pop_back_val();
1523 if (!Visited.insert(CurrV).second)
1524 continue;
1525
1526 auto *CurrI = dyn_cast<Instruction>(CurrV);
1527 if (!CurrI || !TheLoop->contains(CurrI)) {
1528 // If operands from outside the loop may be poison then Ptr may also
1529 // be poison.
1530 if (!isGuaranteedNotToBePoison(CurrV, AC,
1531 TheLoop->getLoopPredecessor()
1532 ->getTerminator()
1533 ->getIterator()))
1534 return false;
1535 continue;
1536 }
1537
1538 // A loaded value may be poison, independent of any flags.
1539 if (isa<LoadInst>(CurrI) && !isGuaranteedNotToBePoison(CurrV, AC))
1540 return false;
1541
1542 // For other ops, assume poison can only be introduced via flags,
1543 // which can be dropped.
1544 if (!isa<PHINode>(CurrI) && !isSafeToSpeculativelyExecute(CurrI))
1545 return false;
1546 append_range(Worklist, CurrI->operands());
1547 }
1548 return true;
1549 };
1550 // Pass the Predicates pointer to isDereferenceableAndAlignedInLoop so
1551 // that it will consider loops that need guarding by SCEV checks. The
1552 // vectoriser will generate these checks if we decide to vectorise.
1553 if (LI && !LI->getType()->isVectorTy() && !mustSuppressSpeculation(*LI) &&
1554 CanSpeculatePointerOp(LI->getPointerOperand()) &&
1555 isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT, AC,
1556 &Predicates))
1557 SafePointers.insert(LI->getPointerOperand());
1558 Predicates.clear();
1559 }
1560 }
1561
1562 // Collect the blocks that need predication.
1563 for (BasicBlock *BB : TheLoop->blocks()) {
1564 // We support only branches and switch statements as terminators inside the
1565 // loop.
1566 if (isa<SwitchInst>(BB->getTerminator())) {
1567 if (TheLoop->isLoopExiting(BB)) {
1568 reportVectorizationFailure("Loop contains an unsupported switch",
1569 "LoopContainsUnsupportedSwitch", ORE,
1570 TheLoop, BB->getTerminator());
1571 return false;
1572 }
1573 } else if (!isa<BranchInst>(BB->getTerminator())) {
1574 reportVectorizationFailure("Loop contains an unsupported terminator",
1575 "LoopContainsUnsupportedTerminator", ORE,
1576 TheLoop, BB->getTerminator());
1577 return false;
1578 }
1579
1580 // We must be able to predicate all blocks that need to be predicated.
1581 if (blockNeedsPredication(BB) &&
1582 !blockCanBePredicated(BB, SafePointers, MaskedOp)) {
1584 "Control flow cannot be substituted for a select", "NoCFGForSelect",
1585 ORE, TheLoop, BB->getTerminator());
1586 return false;
1587 }
1588 }
1589
1590 // We can if-convert this loop.
1591 return true;
1592}
1593
1594// Helper function to canVectorizeLoopNestCFG.
1595bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1596 bool UseVPlanNativePath) {
1597 assert((UseVPlanNativePath || Lp->isInnermost()) &&
1598 "VPlan-native path is not enabled.");
1599
1600 // TODO: ORE should be improved to show more accurate information when an
1601 // outer loop can't be vectorized because a nested loop is not understood or
1602 // legal. Something like: "outer_loop_location: loop not vectorized:
1603 // (inner_loop_location) loop control flow is not understood by vectorizer".
1604
1605 // Store the result and return it at the end instead of exiting early, in case
1606 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1607 bool Result = true;
1608 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1609
1610 // We must have a loop in canonical form. Loops with indirectbr in them cannot
1611 // be canonicalized.
1612 if (!Lp->getLoopPreheader()) {
1613 reportVectorizationFailure("Loop doesn't have a legal pre-header",
1614 "loop control flow is not understood by vectorizer",
1615 "CFGNotUnderstood", ORE, TheLoop);
1616 if (DoExtraAnalysis)
1617 Result = false;
1618 else
1619 return false;
1620 }
1621
1622 // We must have a single backedge.
1623 if (Lp->getNumBackEdges() != 1) {
1624 reportVectorizationFailure("The loop must have a single backedge",
1625 "loop control flow is not understood by vectorizer",
1626 "CFGNotUnderstood", ORE, TheLoop);
1627 if (DoExtraAnalysis)
1628 Result = false;
1629 else
1630 return false;
1631 }
1632
1633 return Result;
1634}
1635
1636bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1637 Loop *Lp, bool UseVPlanNativePath) {
1638 // Store the result and return it at the end instead of exiting early, in case
1639 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1640 bool Result = true;
1641 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1642 if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1643 if (DoExtraAnalysis)
1644 Result = false;
1645 else
1646 return false;
1647 }
1648
1649 // Recursively check whether the loop control flow of nested loops is
1650 // understood.
1651 for (Loop *SubLp : *Lp)
1652 if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1653 if (DoExtraAnalysis)
1654 Result = false;
1655 else
1656 return false;
1657 }
1658
1659 return Result;
1660}
1661
1662bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
1663 BasicBlock *LatchBB = TheLoop->getLoopLatch();
1664 if (!LatchBB) {
1665 reportVectorizationFailure("Loop does not have a latch",
1666 "Cannot vectorize early exit loop",
1667 "NoLatchEarlyExit", ORE, TheLoop);
1668 return false;
1669 }
1670
1671 if (Reductions.size() || FixedOrderRecurrences.size()) {
1673 "Found reductions or recurrences in early-exit loop",
1674 "Cannot vectorize early exit loop with reductions or recurrences",
1675 "RecurrencesInEarlyExitLoop", ORE, TheLoop);
1676 return false;
1677 }
1678
1679 SmallVector<BasicBlock *, 8> ExitingBlocks;
1680 TheLoop->getExitingBlocks(ExitingBlocks);
1681
1682 // Keep a record of all the exiting blocks.
1684 BasicBlock *SingleUncountableExitingBlock = nullptr;
1685 for (BasicBlock *BB : ExitingBlocks) {
1686 const SCEV *EC =
1687 PSE.getSE()->getPredicatedExitCount(TheLoop, BB, &Predicates);
1688 if (isa<SCEVCouldNotCompute>(EC)) {
1689 if (size(successors(BB)) != 2) {
1691 "Early exiting block does not have exactly two successors",
1692 "Incorrect number of successors from early exiting block",
1693 "EarlyExitTooManySuccessors", ORE, TheLoop);
1694 return false;
1695 }
1696
1697 if (SingleUncountableExitingBlock) {
1699 "Loop has too many uncountable exits",
1700 "Cannot vectorize early exit loop with more than one early exit",
1701 "TooManyUncountableEarlyExits", ORE, TheLoop);
1702 return false;
1703 }
1704
1705 SingleUncountableExitingBlock = BB;
1706 } else
1707 CountableExitingBlocks.push_back(BB);
1708 }
1709 // We can safely ignore the predicates here because when vectorizing the loop
1710 // the PredicatatedScalarEvolution class will keep track of all predicates
1711 // for each exiting block anyway. This happens when calling
1712 // PSE.getSymbolicMaxBackedgeTakenCount() below.
1713 Predicates.clear();
1714
1715 if (!SingleUncountableExitingBlock) {
1716 LLVM_DEBUG(dbgs() << "LV: Cound not find any uncountable exits");
1717 return false;
1718 }
1719
1720 // The only supported early exit loops so far are ones where the early
1721 // exiting block is a unique predecessor of the latch block.
1722 BasicBlock *LatchPredBB = LatchBB->getUniquePredecessor();
1723 if (LatchPredBB != SingleUncountableExitingBlock) {
1724 reportVectorizationFailure("Early exit is not the latch predecessor",
1725 "Cannot vectorize early exit loop",
1726 "EarlyExitNotLatchPredecessor", ORE, TheLoop);
1727 return false;
1728 }
1729
1730 // The latch block must have a countable exit.
1731 if (isa<SCEVCouldNotCompute>(
1732 PSE.getSE()->getPredicatedExitCount(TheLoop, LatchBB, &Predicates))) {
1734 "Cannot determine exact exit count for latch block",
1735 "Cannot vectorize early exit loop",
1736 "UnknownLatchExitCountEarlyExitLoop", ORE, TheLoop);
1737 return false;
1738 }
1739 assert(llvm::is_contained(CountableExitingBlocks, LatchBB) &&
1740 "Latch block not found in list of countable exits!");
1741
1742 // Check to see if there are instructions that could potentially generate
1743 // exceptions or have side-effects.
1744 auto IsSafeOperation = [](Instruction *I) -> bool {
1745 switch (I->getOpcode()) {
1746 case Instruction::Load:
1747 case Instruction::Store:
1748 case Instruction::PHI:
1749 case Instruction::Br:
1750 // These are checked separately.
1751 return true;
1752 default:
1754 }
1755 };
1756
1757 for (auto *BB : TheLoop->blocks())
1758 for (auto &I : *BB) {
1759 if (I.mayWriteToMemory()) {
1760 // We don't support writes to memory.
1762 "Writes to memory unsupported in early exit loops",
1763 "Cannot vectorize early exit loop with writes to memory",
1764 "WritesInEarlyExitLoop", ORE, TheLoop);
1765 return false;
1766 } else if (!IsSafeOperation(&I)) {
1767 reportVectorizationFailure("Early exit loop contains operations that "
1768 "cannot be speculatively executed",
1769 "UnsafeOperationsEarlyExitLoop", ORE,
1770 TheLoop);
1771 return false;
1772 }
1773 }
1774
1775 // The vectoriser cannot handle loads that occur after the early exit block.
1776 assert(LatchBB->getUniquePredecessor() == SingleUncountableExitingBlock &&
1777 "Expected latch predecessor to be the early exiting block");
1778
1779 // TODO: Handle loops that may fault.
1780 Predicates.clear();
1781 if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC,
1782 &Predicates)) {
1784 "Loop may fault",
1785 "Cannot vectorize potentially faulting early exit loop",
1786 "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
1787 return false;
1788 }
1789
1790 [[maybe_unused]] const SCEV *SymbolicMaxBTC =
1792 // Since we have an exact exit count for the latch and the early exit
1793 // dominates the latch, then this should guarantee a computed SCEV value.
1794 assert(!isa<SCEVCouldNotCompute>(SymbolicMaxBTC) &&
1795 "Failed to get symbolic expression for backedge taken count");
1796 LLVM_DEBUG(dbgs() << "LV: Found an early exit loop with symbolic max "
1797 "backedge taken count: "
1798 << *SymbolicMaxBTC << '\n');
1799 UncountableExitingBB = SingleUncountableExitingBlock;
1800 return true;
1801}
1802
1803bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1804 // Store the result and return it at the end instead of exiting early, in case
1805 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1806 bool Result = true;
1807
1808 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1809 // Check whether the loop-related control flow in the loop nest is expected by
1810 // vectorizer.
1811 if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1812 if (DoExtraAnalysis) {
1813 LLVM_DEBUG(dbgs() << "LV: legality check failed: loop nest");
1814 Result = false;
1815 } else {
1816 return false;
1817 }
1818 }
1819
1820 // We need to have a loop header.
1821 LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1822 << '\n');
1823
1824 // Specific checks for outer loops. We skip the remaining legal checks at this
1825 // point because they don't support outer loops.
1826 if (!TheLoop->isInnermost()) {
1827 assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1828
1829 if (!canVectorizeOuterLoop()) {
1830 reportVectorizationFailure("Unsupported outer loop",
1831 "UnsupportedOuterLoop", ORE, TheLoop);
1832 // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1833 // outer loops.
1834 return false;
1835 }
1836
1837 LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1838 return Result;
1839 }
1840
1841 assert(TheLoop->isInnermost() && "Inner loop expected.");
1842 // Check if we can if-convert non-single-bb loops.
1843 unsigned NumBlocks = TheLoop->getNumBlocks();
1844 if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1845 LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1846 if (DoExtraAnalysis)
1847 Result = false;
1848 else
1849 return false;
1850 }
1851
1852 // Check if we can vectorize the instructions and CFG in this loop.
1853 if (!canVectorizeInstrs()) {
1854 LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1855 if (DoExtraAnalysis)
1856 Result = false;
1857 else
1858 return false;
1859 }
1860
1861 if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
1862 if (TheLoop->getExitingBlock()) {
1863 reportVectorizationFailure("Cannot vectorize uncountable loop",
1864 "UnsupportedUncountableLoop", ORE, TheLoop);
1865 if (DoExtraAnalysis)
1866 Result = false;
1867 else
1868 return false;
1869 } else {
1870 if (!isVectorizableEarlyExitLoop()) {
1872 "Must be false without vectorizable early-exit loop");
1873 if (DoExtraAnalysis)
1874 Result = false;
1875 else
1876 return false;
1877 }
1878 }
1879 }
1880
1881 // Go over each instruction and look at memory deps.
1882 if (!canVectorizeMemory()) {
1883 LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1884 if (DoExtraAnalysis)
1885 Result = false;
1886 else
1887 return false;
1888 }
1889
1890 if (Result) {
1891 LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1893 ? " (with a runtime bound check)"
1894 : "")
1895 << "!\n");
1896 }
1897
1898 unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1899 if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1900 SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1901
1902 if (PSE.getPredicate().getComplexity() > SCEVThreshold) {
1903 LLVM_DEBUG(dbgs() << "LV: Vectorization not profitable "
1904 "due to SCEVThreshold");
1905 reportVectorizationFailure("Too many SCEV checks needed",
1906 "Too many SCEV assumptions need to be made and checked at runtime",
1907 "TooManySCEVRunTimeChecks", ORE, TheLoop);
1908 if (DoExtraAnalysis)
1909 Result = false;
1910 else
1911 return false;
1912 }
1913
1914 // Okay! We've done all the tests. If any have failed, return false. Otherwise
1915 // we can vectorize, and at this point we don't have any other mem analysis
1916 // which may limit our maximum vectorization factor, so just return true with
1917 // no restrictions.
1918 return Result;
1919}
1920
1922 // The only loops we can vectorize without a scalar epilogue, are loops with
1923 // a bottom-test and a single exiting block. We'd have to handle the fact
1924 // that not every instruction executes on the last iteration. This will
1925 // require a lane mask which varies through the vector loop body. (TODO)
1926 if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
1927 LLVM_DEBUG(
1928 dbgs()
1929 << "LV: Cannot fold tail by masking. Requires a singe latch exit\n");
1930 return false;
1931 }
1932
1933 LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1934
1935 SmallPtrSet<const Value *, 8> ReductionLiveOuts;
1936
1937 for (const auto &Reduction : getReductionVars())
1938 ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
1939
1940 // TODO: handle non-reduction outside users when tail is folded by masking.
1941 for (auto *AE : AllowedExit) {
1942 // Check that all users of allowed exit values are inside the loop or
1943 // are the live-out of a reduction.
1944 if (ReductionLiveOuts.count(AE))
1945 continue;
1946 for (User *U : AE->users()) {
1947 Instruction *UI = cast<Instruction>(U);
1948 if (TheLoop->contains(UI))
1949 continue;
1950 LLVM_DEBUG(
1951 dbgs()
1952 << "LV: Cannot fold tail by masking, loop has an outside user for "
1953 << *UI << "\n");
1954 return false;
1955 }
1956 }
1957
1958 for (const auto &Entry : getInductionVars()) {
1959 PHINode *OrigPhi = Entry.first;
1960 for (User *U : OrigPhi->users()) {
1961 auto *UI = cast<Instruction>(U);
1962 if (!TheLoop->contains(UI)) {
1963 LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking, loop IV has an "
1964 "outside user for "
1965 << *UI << "\n");
1966 return false;
1967 }
1968 }
1969 }
1970
1971 // The list of pointers that we can safely read and write to remains empty.
1972 SmallPtrSet<Value *, 8> SafePointers;
1973
1974 // Check all blocks for predication, including those that ordinarily do not
1975 // need predication such as the header block.
1977 for (BasicBlock *BB : TheLoop->blocks()) {
1978 if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp)) {
1979 LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking.\n");
1980 return false;
1981 }
1982 }
1983
1984 LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1985
1986 return true;
1987}
1988
1990 // The list of pointers that we can safely read and write to remains empty.
1991 SmallPtrSet<Value *, 8> SafePointers;
1992
1993 // Mark all blocks for predication, including those that ordinarily do not
1994 // need predication such as the header block.
1995 for (BasicBlock *BB : TheLoop->blocks()) {
1996 [[maybe_unused]] bool R = blockCanBePredicated(BB, SafePointers, MaskedOp);
1997 assert(R && "Must be able to predicate block when tail-folding.");
1998 }
1999}
2000
2001} // namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:687
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
#define DEBUG_TYPE
Hexagon Common GEP
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:546
loop Loop Strength Reduction
static cl::opt< LoopVectorizeHints::ScalableForceKind > ForceScalableVectorization("scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified), cl::Hidden, cl::desc("Control whether the compiler can use scalable vectors to " "vectorize a loop"), cl::values(clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off", "Scalable vectorization is disabled."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "preferred", "Scalable vectorization is available and favored when the " "cost is inconclusive."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "on", "Scalable vectorization is available and favored when the " "cost is inconclusive.")))
#define LV_NAME
static cl::opt< unsigned > PragmaVectorizeSCEVCheckThreshold("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma"))
static cl::opt< bool > HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden, cl::desc("Allow enabling loop hints to reorder " "FP operations during vectorization."))
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
static cl::opt< bool > AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden, cl::desc("Enable recognition of non-constant strided " "pointer induction variables."))
static cl::opt< unsigned > VectorizeSCEVCheckThreshold("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))
static cl::opt< bool > EnableHistogramVectorization("enable-histogram-loop-vectorization", cl::init(false), cl::Hidden, cl::desc("Enables autovectorization of some loops containing histograms"))
static cl::opt< bool > EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
This file defines the LoopVectorizationLegality class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define H(x, y, z)
Definition: MD5.cpp:57
if(PassOpts->AAPipeline)
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
#define LLVM_DEBUG(...)
Definition: Debug.h:119
This pass exposes codegen information to IR-level passes.
Virtual Register Rewriter
Definition: VirtRegMap.cpp:269
static const uint32_t IV[8]
Definition: blake3_impl.h:83
Class for arbitrary precision integers.
Definition: APInt.h:78
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:445
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:131
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:233
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1348
This class represents a function call, abstracting a target machine's calling convention.
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:535
This is the shared class of boolean and integer constants.
Definition: Constants.h:87
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:315
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:312
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:323
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:803
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:949
A struct for saving information about induction variables.
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
static LLVM_ABI bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
Instruction * getExactFPMathInst()
Returns floating-point induction operator that does not allow reassociation (transforming the inducti...
Class to represent integer types.
Definition: DerivedTypes.h:42
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
An instruction for reading from memory.
Definition: Instructions.h:180
Value * getPointerOperand()
Definition: Instructions.h:259
LLVM_ABI const LoopAccessInfo & getInfo(Loop &L, bool AllowPartial=false)
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
ArrayRef< StoreInst * > getStoresToInvariantAddresses() const
Return the list of stores to invariant addresses.
const OptimizationRemarkAnalysis * getReport() const
The diagnostics report generated for the analysis.
const RuntimePointerChecking * getRuntimePointerChecking() const
bool canVectorizeMemory() const
Return true we can analyze the memory accesses in the loop and there are no memory dependence cycles.
LLVM_ABI bool isInvariant(Value *V) const
Returns true if value V is loop invariant.
bool hasLoadStoreDependenceInvolvingLoopInvariantAddress() const
Return true if the loop has memory dependence involving a load and a store to an invariant address,...
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
static LLVM_ABI bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
const DenseMap< Value *, const SCEV * > & getSymbolicStrides() const
If an access has a symbolic strides, this maps the pointer value to the stride symbol.
bool hasStoreStoreDependenceInvolvingLoopInvariantAddress() const
Return true if the loop has memory dependence involving two stores to an invariant address,...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
BlockT * getHeader() const
iterator_range< block_iterator > blocks() const
BlockT * getLoopPredecessor() const
If the given loop's header has exactly one unique predecessor outside the loop, return it.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
bool isLoopHeader(const BlockT *BB) const
bool isInvariantStoreOfReduction(StoreInst *SI)
Returns True if given store is a final invariant store of one of the reductions found in the loop.
bool isInvariantAddressOfReduction(Value *V)
Returns True if given address is invariant and is used to store recurrent expression.
bool blockNeedsPredication(BasicBlock *BB) const
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
bool canVectorize(bool UseVPlanNativePath)
Returns true if it is legal to vectorize this loop.
int isConsecutivePtr(Type *AccessTy, Value *Ptr) const
Check if this pointer is consecutive when vectorizing.
bool canVectorizeFPMath(bool EnableStrictReductions)
Returns true if it is legal to vectorize the FP math operations in this loop.
bool isFixedOrderRecurrence(const PHINode *Phi) const
Returns True if Phi is a fixed-order recurrence in this loop.
const InductionDescriptor * getPointerInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is pointer induction.
const InductionDescriptor * getIntOrFpInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is an integer or floating point induction.
bool isInductionPhi(const Value *V) const
Returns True if V is a Phi node of an induction variable in this loop.
bool isUniform(Value *V, ElementCount VF) const
Returns true if value V is uniform across VF lanes, when VF is provided, and otherwise if V is invari...
const InductionList & getInductionVars() const
Returns the induction variables found in the loop.
bool isInvariant(Value *V) const
Returns true if V is invariant across all loop iterations according to SCEV.
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
bool canFoldTailByMasking() const
Return true if we can vectorize this loop while folding its tail by masking.
void prepareToFoldTailByMasking()
Mark all respective loads/stores for masking.
bool hasUncountableEarlyExit() const
Returns true if the loop has exactly one uncountable early exit, i.e.
bool isUniformMemOp(Instruction &I, ElementCount VF) const
A uniform memory op is a load or store which accesses the same memory location on all VF lanes,...
BasicBlock * getUncountableEarlyExitingBlock() const
Returns the uncountable early exiting block, if there is exactly one.
bool isInductionVariable(const Value *V) const
Returns True if V can be considered as an induction variable in this loop.
bool isCastedInductionVariable(const Value *V) const
Returns True if V is a cast that is part of an induction def-use chain, and had been proven to be red...
void addExactFPMathInst(Instruction *I)
Track the 1st floating-point instruction that can not be reassociated.
@ SK_PreferScalable
Vectorize loops using scalable vectors or fixed-width vectors, but favor scalable vectors when the co...
@ SK_FixedWidthOnly
Disables vectorization with scalable vectors.
bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const
bool allowReordering() const
When enabling loop hints are provided we allow the vectorizer to change the order of operations that ...
void emitRemarkWithHints() const
Dumps all the hint information.
void setAlreadyVectorized()
Mark the loop L as already vectorized by setting the width to 1.
LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE, const TargetTransformInfo *TTI=nullptr)
const char * vectorizeAnalysisPassName() const
If hints are provided that force vectorization, use the AlwaysPrint pass name to force the frontend t...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:40
bool isLoopInvariant(const Value *V, bool HasCoroSuspendInst=false) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:61
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:644
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:538
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
Definition: LoopInfo.cpp:163
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:514
Metadata node.
Definition: Metadata.h:1077
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1445
ArrayRef< MDOperand > operands() const
Definition: Metadata.h:1443
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1565
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1451
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:899
A single uniqued string.
Definition: Metadata.h:720
LLVM_ABI StringRef getString() const
Definition: Metadata.cpp:617
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:607
size_type count(const KeyT &Key) const
Definition: MapVector.h:139
iterator find(const KeyT &Key)
Definition: MapVector.h:141
bool empty() const
Definition: MapVector.h:75
size_type size() const
Definition: MapVector.h:56
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
Root of the metadata hierarchy.
Definition: Metadata.h:63
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
bool allowExtraAnalysis(StringRef PassName) const
Whether we allow for extra compile-time budget to perform more analysis to produce fewer false positi...
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
LLVM_ABI void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEVPredicate & getPredicate() const
LLVM_ABI const SCEV * getBackedgeTakenCount()
Get the (predicated) backedge count for the analyzed loop.
LLVM_ABI const SCEV * getSymbolicMaxBackedgeTakenCount()
Get the (predicated) symbolic max backedge count for the analyzed loop.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:90
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
static LLVM_ABI bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, DominatorTree *DT)
Returns true if Phi is a fixed-order recurrence.
bool hasExactFPMath() const
Returns true if the recurrence has floating-point math that requires precise (ordered) operations.
Instruction * getLoopExitInstr() const
static LLVM_ABI bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
bool Need
This flag indicates if we need to add the runtime check.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
virtual unsigned getComplexity() const
Returns the estimated complexity of this predicate.
virtual bool isAlwaysTrue() const =0
Returns true if the predicate is always true.
This visitor recursively visits a SCEV expression and re-writes it.
const SCEV * visit(const SCEV *S)
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
The main scalar evolution driver.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI const SCEV * getPredicatedExitCount(const Loop *L, const BasicBlock *ExitingBlock, SmallVectorImpl< const SCEVPredicate * > *Predicates, ExitCountKind Kind=Exact)
Same as above except this uses the predicated backedge taken info and may require predicates.
LLVM_ABI const SCEV * getCouldNotCompute()
size_type size() const
Definition: SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:380
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:470
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
bool empty() const
Definition: SmallVector.h:82
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:938
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
An instruction for storing to memory.
Definition: Instructions.h:296
Value * getPointerOperand()
Definition: Instructions.h:386
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
static constexpr size_t npos
Definition: StringRef.h:57
Provides information about what library functions are available for the current target.
bool hasOptimizedCodeGen(LibFunc F) const
Tests if the function is both available and a candidate for optimized code generation.
void getWidestVF(StringRef ScalarF, ElementCount &FixedVF, ElementCount &ScalableVF) const
Returns the largest vectorization factor used in the list of vector functions.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI bool isLegalNTLoad(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal load.
LLVM_ABI bool isLegalNTStore(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal store.
LLVM_ABI bool enableScalableVectorization() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:273
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:267
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition: Type.h:255
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:240
Value * getOperand(unsigned i) const
Definition: User.h:232
static bool hasMaskedVariant(const CallInst &CI, std::optional< ElementCount > VF=std::nullopt)
Definition: VectorUtils.h:85
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition: VectorUtils.h:74
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
iterator_range< user_iterator > users()
Definition: Value.h:426
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:322
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:233
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:172
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:169
constexpr bool isZero() const
Definition: TypeSize.h:157
const ParentTy * getParent() const
Definition: ilist_node.h:34
self_iterator getIterator()
Definition: ilist_node.h:134
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
TwoOps_match< ValueOpTy, PointerOpTy, Instruction::Store > m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp)
Matches StoreInst.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:862
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:962
match_combine_or< match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > >, OpTy > m_ZExtOrSExtOrSelf(const OpTy &Op)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:712
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:338
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1702
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
auto successors(const MachineBasicBlock *BB)
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2155
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
static bool isUniformLoop(Loop *Lp, Loop *OuterLp)
LLVM_ABI bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
Definition: Loads.cpp:416
static bool canWidenCallReturnType(Type *Ty)
Returns true if the call return type Ty can be widened by the loop vectorizer.
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI bool isDereferenceableReadOnlyLoop(Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Return true if the loop L cannot fault on any iteration and only contains read-only memory accesses.
Definition: Loads.cpp:863
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:288
static IntegerType * getWiderInductionTy(const DataLayout &DL, Type *Ty0, Type *Ty1)
static IntegerType * getInductionIntegerTy(const DataLayout &DL, Type *Ty)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value * > &AllowedExit)
Check that the instruction has outside loop users and is not an identified reduction variable.
static bool storeToSameAddress(ScalarEvolution *SE, StoreInst *A, StoreInst *B)
Returns true if A and B have same pointer operands or same SCEVs addresses.
bool canVectorizeTy(Type *Ty)
Returns true if Ty is a valid vector element type, void, or an unpacked literal struct where all elem...
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
LLVM_ABI void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
LLVM_ABI llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
Definition: LoopInfo.cpp:1182
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2139
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1916
static bool findHistogram(LoadInst *LI, StoreInst *HSt, Loop *TheLoop, const PredicatedScalarEvolution &PSE, SmallVectorImpl< HistogramInfo > &Histograms)
Find histogram operations that match high-level code in loops:
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI)
Checks if a function is scalarizable according to the TLI, in the sense that it should be vectorized ...
LLVM_ABI bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, AssumptionCache *AC=nullptr, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...
Definition: Loads.cpp:292
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
Dependece between memory access instructions.
Instruction * getDestination(const MemoryDepChecker &DepChecker) const
Return the destination instruction of the dependence.
Instruction * getSource(const MemoryDepChecker &DepChecker) const
Return the source instruction of the dependence.
static LLVM_ABI VectorizationSafetyStatus isSafeForVectorization(DepType Type)
Dependence types that don't prevent vectorization.
An object of this class is returned by queries that could not be answered.
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
Collection of parameters shared beetween the Loop Vectorizer and the Loop Access Analysis.
static LLVM_ABI const unsigned MaxVectorWidth
Maximum SIMD width.
static LLVM_ABI bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
static LLVM_ABI unsigned VectorizationInterleave
Interleave factor as overridden by the user.