LLVM 22.0.0git
LoopAccessAnalysis.cpp
Go to the documentation of this file.
1//===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The implementation for the loop memory dependence that was originally
10// developed for the loop vectorizer.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/SetVector.h"
22#include "llvm/ADT/SmallSet.h"
40#include "llvm/IR/BasicBlock.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugLoc.h"
46#include "llvm/IR/Dominators.h"
47#include "llvm/IR/Function.h"
48#include "llvm/IR/InstrTypes.h"
49#include "llvm/IR/Instruction.h"
52#include "llvm/IR/PassManager.h"
53#include "llvm/IR/Type.h"
54#include "llvm/IR/Value.h"
55#include "llvm/IR/ValueHandle.h"
58#include "llvm/Support/Debug.h"
61#include <algorithm>
62#include <cassert>
63#include <cstdint>
64#include <iterator>
65#include <utility>
66#include <variant>
67#include <vector>
68
69using namespace llvm;
70using namespace llvm::SCEVPatternMatch;
71
72#define DEBUG_TYPE "loop-accesses"
73
75VectorizationFactor("force-vector-width", cl::Hidden,
76 cl::desc("Sets the SIMD width. Zero is autoselect."),
79
81VectorizationInterleave("force-vector-interleave", cl::Hidden,
82 cl::desc("Sets the vectorization interleave count. "
83 "Zero is autoselect."),
87
89 "runtime-memory-check-threshold", cl::Hidden,
90 cl::desc("When performing memory disambiguation checks at runtime do not "
91 "generate more than this number of comparisons (default = 8)."),
94
95/// The maximum iterations used to merge memory checks
97 "memory-check-merge-threshold", cl::Hidden,
98 cl::desc("Maximum number of comparisons done when trying to merge "
99 "runtime memory checks. (default = 100)"),
100 cl::init(100));
101
102/// Maximum SIMD width.
103const unsigned VectorizerParams::MaxVectorWidth = 64;
104
105/// We collect dependences up to this threshold.
107 MaxDependences("max-dependences", cl::Hidden,
108 cl::desc("Maximum number of dependences collected by "
109 "loop-access analysis (default = 100)"),
110 cl::init(100));
111
112/// This enables versioning on the strides of symbolically striding memory
113/// accesses in code like the following.
114/// for (i = 0; i < N; ++i)
115/// A[i * Stride1] += B[i * Stride2] ...
116///
117/// Will be roughly translated to
118/// if (Stride1 == 1 && Stride2 == 1) {
119/// for (i = 0; i < N; i+=4)
120/// A[i:i+3] += ...
121/// } else
122/// ...
124 "enable-mem-access-versioning", cl::init(true), cl::Hidden,
125 cl::desc("Enable symbolic stride memory access versioning"));
126
127/// Enable store-to-load forwarding conflict detection. This option can
128/// be disabled for correctness testing.
130 "store-to-load-forwarding-conflict-detection", cl::Hidden,
131 cl::desc("Enable conflict detection in loop-access analysis"),
132 cl::init(true));
133
135 "max-forked-scev-depth", cl::Hidden,
136 cl::desc("Maximum recursion depth when finding forked SCEVs (default = 5)"),
137 cl::init(5));
138
140 "laa-speculate-unit-stride", cl::Hidden,
141 cl::desc("Speculate that non-constant strides are unit in LAA"),
142 cl::init(true));
143
145 "hoist-runtime-checks", cl::Hidden,
146 cl::desc(
147 "Hoist inner loop runtime memory checks to outer loop if possible"),
150
152 return ::VectorizationInterleave.getNumOccurrences() > 0;
153}
154
156 const DenseMap<Value *, const SCEV *> &PtrToStride,
157 Value *Ptr) {
158 const SCEV *OrigSCEV = PSE.getSCEV(Ptr);
159
160 // If there is an entry in the map return the SCEV of the pointer with the
161 // symbolic stride replaced by one.
162 const SCEV *StrideSCEV = PtrToStride.lookup(Ptr);
163 if (!StrideSCEV)
164 // For a non-symbolic stride, just return the original expression.
165 return OrigSCEV;
166
167 // Note: This assert is both overly strong and overly weak. The actual
168 // invariant here is that StrideSCEV should be loop invariant. The only
169 // such invariant strides we happen to speculate right now are unknowns
170 // and thus this is a reasonable proxy of the actual invariant.
171 assert(isa<SCEVUnknown>(StrideSCEV) && "shouldn't be in map");
172
173 ScalarEvolution *SE = PSE.getSE();
174 const SCEV *CT = SE->getOne(StrideSCEV->getType());
175 PSE.addPredicate(*SE->getEqualPredicate(StrideSCEV, CT));
176 const SCEV *Expr = PSE.getSCEV(Ptr);
177
178 LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV
179 << " by: " << *Expr << "\n");
180 return Expr;
181}
182
184 unsigned Index, const RuntimePointerChecking &RtCheck)
185 : High(RtCheck.Pointers[Index].End), Low(RtCheck.Pointers[Index].Start),
186 AddressSpace(RtCheck.Pointers[Index]
187 .PointerValue->getType()
189 NeedsFreeze(RtCheck.Pointers[Index].NeedsFreeze) {
190 Members.push_back(Index);
191}
192
193/// Returns \p A + \p B, if it is guaranteed not to unsigned wrap. Otherwise
194/// return nullptr. \p A and \p B must have the same type.
195static const SCEV *addSCEVNoOverflow(const SCEV *A, const SCEV *B,
196 ScalarEvolution &SE,
197 const Instruction *CtxI) {
198 if (!SE.willNotOverflow(Instruction::Add, /*IsSigned=*/false, A, B, CtxI))
199 return nullptr;
200 return SE.getAddExpr(A, B);
201}
202
203/// Returns \p A * \p B, if it is guaranteed not to unsigned wrap. Otherwise
204/// return nullptr. \p A and \p B must have the same type.
205static const SCEV *mulSCEVOverflow(const SCEV *A, const SCEV *B,
206 ScalarEvolution &SE,
207 const Instruction *CtxI) {
208 if (!SE.willNotOverflow(Instruction::Mul, /*IsSigned=*/false, A, B, CtxI))
209 return nullptr;
210 return SE.getMulExpr(A, B);
211}
212
213/// Return true, if evaluating \p AR at \p MaxBTC cannot wrap, because \p AR at
214/// \p MaxBTC is guaranteed inbounds of the accessed object.
215static bool
217 const SCEV *MaxBTC, const SCEV *EltSize,
218 ScalarEvolution &SE, const DataLayout &DL,
220 auto *PointerBase = SE.getPointerBase(AR->getStart());
221 auto *StartPtr = dyn_cast<SCEVUnknown>(PointerBase);
222 if (!StartPtr)
223 return false;
224 const Loop *L = AR->getLoop();
225 bool CheckForNonNull, CheckForFreed;
226 Value *StartPtrV = StartPtr->getValue();
227 uint64_t DerefBytes = StartPtrV->getPointerDereferenceableBytes(
228 DL, CheckForNonNull, CheckForFreed);
229
230 if (DerefBytes && (CheckForNonNull || CheckForFreed))
231 return false;
232
233 const SCEV *Step = AR->getStepRecurrence(SE);
234 Type *WiderTy = SE.getWiderType(MaxBTC->getType(), Step->getType());
235 const SCEV *DerefBytesSCEV = SE.getConstant(WiderTy, DerefBytes);
236
237 // Context which dominates the entire loop.
238 auto *CtxI = L->getLoopPredecessor()->getTerminator();
239 // Check if we have a suitable dereferencable assumption we can use.
240 if (!StartPtrV->canBeFreed()) {
242 StartPtrV, {Attribute::Dereferenceable}, *AC, CtxI, DT);
243 if (DerefRK) {
244 DerefBytesSCEV = SE.getUMaxExpr(
245 DerefBytesSCEV, SE.getConstant(WiderTy, DerefRK.ArgValue));
246 }
247 }
248
249 if (DerefBytesSCEV->isZero())
250 return false;
251
252 bool IsKnownNonNegative = SE.isKnownNonNegative(Step);
253 if (!IsKnownNonNegative && !SE.isKnownNegative(Step))
254 return false;
255
256 Step = SE.getNoopOrSignExtend(Step, WiderTy);
257 MaxBTC = SE.getNoopOrZeroExtend(MaxBTC, WiderTy);
258
259 // For the computations below, make sure they don't unsigned wrap.
260 if (!SE.isKnownPredicate(CmpInst::ICMP_UGE, AR->getStart(), StartPtr))
261 return false;
262 const SCEV *StartOffset = SE.getNoopOrZeroExtend(
263 SE.getMinusSCEV(AR->getStart(), StartPtr), WiderTy);
264
265 const SCEV *OffsetAtLastIter =
266 mulSCEVOverflow(MaxBTC, SE.getAbsExpr(Step, /*IsNSW=*/false), SE, CtxI);
267 if (!OffsetAtLastIter)
268 return false;
269
270 const SCEV *OffsetEndBytes = addSCEVNoOverflow(
271 OffsetAtLastIter, SE.getNoopOrZeroExtend(EltSize, WiderTy), SE, CtxI);
272 if (!OffsetEndBytes)
273 return false;
274
275 if (IsKnownNonNegative) {
276 // For positive steps, check if
277 // (AR->getStart() - StartPtr) + (MaxBTC * Step) + EltSize <= DerefBytes,
278 // while making sure none of the computations unsigned wrap themselves.
279 const SCEV *EndBytes =
280 addSCEVNoOverflow(StartOffset, OffsetEndBytes, SE, CtxI);
281 if (!EndBytes)
282 return false;
283 return SE.isKnownPredicate(CmpInst::ICMP_ULE, EndBytes, DerefBytesSCEV);
284 }
285
286 // For negative steps check if
287 // * StartOffset >= (MaxBTC * Step + EltSize)
288 // * StartOffset <= DerefBytes.
289 assert(SE.isKnownNegative(Step) && "must be known negative");
290 return SE.isKnownPredicate(CmpInst::ICMP_SGE, StartOffset, OffsetEndBytes) &&
291 SE.isKnownPredicate(CmpInst::ICMP_ULE, StartOffset, DerefBytesSCEV);
292}
293
294std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
295 const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,
296 const SCEV *MaxBTC, ScalarEvolution *SE,
297 DenseMap<std::pair<const SCEV *, Type *>,
298 std::pair<const SCEV *, const SCEV *>> *PointerBounds,
300 std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;
301 if (PointerBounds) {
302 auto [Iter, Ins] = PointerBounds->insert(
303 {{PtrExpr, AccessTy},
304 {SE->getCouldNotCompute(), SE->getCouldNotCompute()}});
305 if (!Ins)
306 return Iter->second;
307 PtrBoundsPair = &Iter->second;
308 }
309
310 const SCEV *ScStart;
311 const SCEV *ScEnd;
312
313 auto &DL = Lp->getHeader()->getDataLayout();
314 Type *IdxTy = DL.getIndexType(PtrExpr->getType());
315 const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy);
316 if (SE->isLoopInvariant(PtrExpr, Lp)) {
317 ScStart = ScEnd = PtrExpr;
318 } else if (auto *AR = dyn_cast<SCEVAddRecExpr>(PtrExpr)) {
319 ScStart = AR->getStart();
320 if (!isa<SCEVCouldNotCompute>(BTC))
321 // Evaluating AR at an exact BTC is safe: LAA separately checks that
322 // accesses cannot wrap in the loop. If evaluating AR at BTC wraps, then
323 // the loop either triggers UB when executing a memory access with a
324 // poison pointer or the wrapping/poisoned pointer is not used.
325 ScEnd = AR->evaluateAtIteration(BTC, *SE);
326 else {
327 // Evaluating AR at MaxBTC may wrap and create an expression that is less
328 // than the start of the AddRec due to wrapping (for example consider
329 // MaxBTC = -2). If that's the case, set ScEnd to -(EltSize + 1). ScEnd
330 // will get incremented by EltSize before returning, so this effectively
331 // sets ScEnd to the maximum unsigned value for the type. Note that LAA
332 // separately checks that accesses cannot not wrap, so unsigned max
333 // represents an upper bound.
334 if (evaluatePtrAddRecAtMaxBTCWillNotWrap(AR, MaxBTC, EltSizeSCEV, *SE, DL,
335 DT, AC)) {
336 ScEnd = AR->evaluateAtIteration(MaxBTC, *SE);
337 } else {
338 ScEnd = SE->getAddExpr(
339 SE->getNegativeSCEV(EltSizeSCEV),
341 ConstantInt::get(EltSizeSCEV->getType(), -1), AR->getType())));
342 }
343 }
344 const SCEV *Step = AR->getStepRecurrence(*SE);
345
346 // For expressions with negative step, the upper bound is ScStart and the
347 // lower bound is ScEnd.
348 if (const auto *CStep = dyn_cast<SCEVConstant>(Step)) {
349 if (CStep->getValue()->isNegative())
350 std::swap(ScStart, ScEnd);
351 } else {
352 // Fallback case: the step is not constant, but we can still
353 // get the upper and lower bounds of the interval by using min/max
354 // expressions.
355 ScStart = SE->getUMinExpr(ScStart, ScEnd);
356 ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
357 }
358 } else
359 return {SE->getCouldNotCompute(), SE->getCouldNotCompute()};
360
361 assert(SE->isLoopInvariant(ScStart, Lp) && "ScStart needs to be invariant");
362 assert(SE->isLoopInvariant(ScEnd, Lp) && "ScEnd needs to be invariant");
363
364 // Add the size of the pointed element to ScEnd.
365 ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
366
367 std::pair<const SCEV *, const SCEV *> Res = {ScStart, ScEnd};
368 if (PointerBounds)
369 *PtrBoundsPair = Res;
370 return Res;
371}
372
373/// Calculate Start and End points of memory access using
374/// getStartAndEndForAccess.
376 Type *AccessTy, bool WritePtr,
377 unsigned DepSetId, unsigned ASId,
379 bool NeedsFreeze) {
380 const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount();
381 const SCEV *BTC = PSE.getBackedgeTakenCount();
382 const auto &[ScStart, ScEnd] = getStartAndEndForAccess(
383 Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, PSE.getSE(),
384 &DC.getPointerBounds(), DC.getDT(), DC.getAC());
385 assert(!isa<SCEVCouldNotCompute>(ScStart) &&
386 !isa<SCEVCouldNotCompute>(ScEnd) &&
387 "must be able to compute both start and end expressions");
388 Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, PtrExpr,
389 NeedsFreeze);
390}
391
392bool RuntimePointerChecking::tryToCreateDiffCheck(
393 const RuntimeCheckingPtrGroup &CGI, const RuntimeCheckingPtrGroup &CGJ) {
394 // If either group contains multiple different pointers, bail out.
395 // TODO: Support multiple pointers by using the minimum or maximum pointer,
396 // depending on src & sink.
397 if (CGI.Members.size() != 1 || CGJ.Members.size() != 1)
398 return false;
399
400 const PointerInfo *Src = &Pointers[CGI.Members[0]];
401 const PointerInfo *Sink = &Pointers[CGJ.Members[0]];
402
403 // If either pointer is read and written, multiple checks may be needed. Bail
404 // out.
405 if (!DC.getOrderForAccess(Src->PointerValue, !Src->IsWritePtr).empty() ||
406 !DC.getOrderForAccess(Sink->PointerValue, !Sink->IsWritePtr).empty())
407 return false;
408
409 ArrayRef<unsigned> AccSrc =
410 DC.getOrderForAccess(Src->PointerValue, Src->IsWritePtr);
411 ArrayRef<unsigned> AccSink =
412 DC.getOrderForAccess(Sink->PointerValue, Sink->IsWritePtr);
413 // If either pointer is accessed multiple times, there may not be a clear
414 // src/sink relation. Bail out for now.
415 if (AccSrc.size() != 1 || AccSink.size() != 1)
416 return false;
417
418 // If the sink is accessed before src, swap src/sink.
419 if (AccSink[0] < AccSrc[0])
420 std::swap(Src, Sink);
421
422 const SCEVConstant *Step;
423 const SCEV *SrcStart;
424 const SCEV *SinkStart;
425 const Loop *InnerLoop = DC.getInnermostLoop();
426 if (!match(Src->Expr,
428 m_SpecificLoop(InnerLoop))) ||
429 !match(Sink->Expr,
431 m_SpecificLoop(InnerLoop))))
432 return false;
433
435 DC.getInstructionsForAccess(Src->PointerValue, Src->IsWritePtr);
437 DC.getInstructionsForAccess(Sink->PointerValue, Sink->IsWritePtr);
438 Type *SrcTy = getLoadStoreType(SrcInsts[0]);
439 Type *DstTy = getLoadStoreType(SinkInsts[0]);
440 if (isa<ScalableVectorType>(SrcTy) || isa<ScalableVectorType>(DstTy))
441 return false;
442
443 const DataLayout &DL = InnerLoop->getHeader()->getDataLayout();
444 unsigned AllocSize =
445 std::max(DL.getTypeAllocSize(SrcTy), DL.getTypeAllocSize(DstTy));
446
447 // Only matching constant steps matching the AllocSize are supported at the
448 // moment. This simplifies the difference computation. Can be extended in the
449 // future.
450 if (Step->getAPInt().abs() != AllocSize)
451 return false;
452
453 IntegerType *IntTy =
454 IntegerType::get(Src->PointerValue->getContext(),
455 DL.getPointerSizeInBits(CGI.AddressSpace));
456
457 // When counting down, the dependence distance needs to be swapped.
458 if (Step->getValue()->isNegative())
459 std::swap(SinkStart, SrcStart);
460
461 const SCEV *SinkStartInt = SE->getPtrToIntExpr(SinkStart, IntTy);
462 const SCEV *SrcStartInt = SE->getPtrToIntExpr(SrcStart, IntTy);
463 if (isa<SCEVCouldNotCompute>(SinkStartInt) ||
464 isa<SCEVCouldNotCompute>(SrcStartInt))
465 return false;
466
467 // If the start values for both Src and Sink also vary according to an outer
468 // loop, then it's probably better to avoid creating diff checks because
469 // they may not be hoisted. We should instead let llvm::addRuntimeChecks
470 // do the expanded full range overlap checks, which can be hoisted.
471 if (HoistRuntimeChecks && InnerLoop->getParentLoop() &&
472 isa<SCEVAddRecExpr>(SinkStartInt) && isa<SCEVAddRecExpr>(SrcStartInt)) {
473 auto *SrcStartAR = cast<SCEVAddRecExpr>(SrcStartInt);
474 auto *SinkStartAR = cast<SCEVAddRecExpr>(SinkStartInt);
475 const Loop *StartARLoop = SrcStartAR->getLoop();
476 if (StartARLoop == SinkStartAR->getLoop() &&
477 StartARLoop == InnerLoop->getParentLoop() &&
478 // If the diff check would already be loop invariant (due to the
479 // recurrences being the same), then we prefer to keep the diff checks
480 // because they are cheaper.
481 SrcStartAR->getStepRecurrence(*SE) !=
482 SinkStartAR->getStepRecurrence(*SE)) {
483 LLVM_DEBUG(dbgs() << "LAA: Not creating diff runtime check, since these "
484 "cannot be hoisted out of the outer loop\n");
485 return false;
486 }
487 }
488
489 LLVM_DEBUG(dbgs() << "LAA: Creating diff runtime check for:\n"
490 << "SrcStart: " << *SrcStartInt << '\n'
491 << "SinkStartInt: " << *SinkStartInt << '\n');
492 DiffChecks.emplace_back(SrcStartInt, SinkStartInt, AllocSize,
493 Src->NeedsFreeze || Sink->NeedsFreeze);
494 return true;
495}
496
497SmallVector<RuntimePointerCheck, 4> RuntimePointerChecking::generateChecks() {
499
500 for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
501 for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) {
504
505 if (needsChecking(CGI, CGJ)) {
506 CanUseDiffCheck = CanUseDiffCheck && tryToCreateDiffCheck(CGI, CGJ);
507 Checks.emplace_back(&CGI, &CGJ);
508 }
509 }
510 }
511 return Checks;
512}
513
514void RuntimePointerChecking::generateChecks(
515 MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
516 assert(Checks.empty() && "Checks is not empty");
517 groupChecks(DepCands, UseDependencies);
518 Checks = generateChecks();
519}
520
522 const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const {
523 for (const auto &I : M.Members)
524 for (const auto &J : N.Members)
525 if (needsChecking(I, J))
526 return true;
527 return false;
528}
529
530/// Compare \p I and \p J and return the minimum.
531/// Return nullptr in case we couldn't find an answer.
532static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,
533 ScalarEvolution *SE) {
534 std::optional<APInt> Diff = SE->computeConstantDifference(J, I);
535 if (!Diff)
536 return nullptr;
537 return Diff->isNegative() ? J : I;
538}
539
541 unsigned Index, const RuntimePointerChecking &RtCheck) {
542 return addPointer(
543 Index, RtCheck.Pointers[Index].Start, RtCheck.Pointers[Index].End,
544 RtCheck.Pointers[Index].PointerValue->getType()->getPointerAddressSpace(),
545 RtCheck.Pointers[Index].NeedsFreeze, *RtCheck.SE);
546}
547
548bool RuntimeCheckingPtrGroup::addPointer(unsigned Index, const SCEV *Start,
549 const SCEV *End, unsigned AS,
550 bool NeedsFreeze,
551 ScalarEvolution &SE) {
552 assert(AddressSpace == AS &&
553 "all pointers in a checking group must be in the same address space");
554
555 // Compare the starts and ends with the known minimum and maximum
556 // of this set. We need to know how we compare against the min/max
557 // of the set in order to be able to emit memchecks.
558 const SCEV *Min0 = getMinFromExprs(Start, Low, &SE);
559 if (!Min0)
560 return false;
561
562 const SCEV *Min1 = getMinFromExprs(End, High, &SE);
563 if (!Min1)
564 return false;
565
566 // Update the low bound expression if we've found a new min value.
567 if (Min0 == Start)
568 Low = Start;
569
570 // Update the high bound expression if we've found a new max value.
571 if (Min1 != End)
572 High = End;
573
574 Members.push_back(Index);
575 this->NeedsFreeze |= NeedsFreeze;
576 return true;
577}
578
579void RuntimePointerChecking::groupChecks(
580 MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
581 // We build the groups from dependency candidates equivalence classes
582 // because:
583 // - We know that pointers in the same equivalence class share
584 // the same underlying object and therefore there is a chance
585 // that we can compare pointers
586 // - We wouldn't be able to merge two pointers for which we need
587 // to emit a memcheck. The classes in DepCands are already
588 // conveniently built such that no two pointers in the same
589 // class need checking against each other.
590
591 // We use the following (greedy) algorithm to construct the groups
592 // For every pointer in the equivalence class:
593 // For each existing group:
594 // - if the difference between this pointer and the min/max bounds
595 // of the group is a constant, then make the pointer part of the
596 // group and update the min/max bounds of that group as required.
597
598 CheckingGroups.clear();
599
600 // If we need to check two pointers to the same underlying object
601 // with a non-constant difference, we shouldn't perform any pointer
602 // grouping with those pointers. This is because we can easily get
603 // into cases where the resulting check would return false, even when
604 // the accesses are safe.
605 //
606 // The following example shows this:
607 // for (i = 0; i < 1000; ++i)
608 // a[5000 + i * m] = a[i] + a[i + 9000]
609 //
610 // Here grouping gives a check of (5000, 5000 + 1000 * m) against
611 // (0, 10000) which is always false. However, if m is 1, there is no
612 // dependence. Not grouping the checks for a[i] and a[i + 9000] allows
613 // us to perform an accurate check in this case.
614 //
615 // In the above case, we have a non-constant distance and an Unknown
616 // dependence between accesses to the same underlying object, and could retry
617 // with runtime checks. Therefore UseDependencies is false. In this case we
618 // will use the fallback path and create separate checking groups for all
619 // pointers.
620
621 // If we don't have the dependency partitions, construct a new
622 // checking pointer group for each pointer. This is also required
623 // for correctness, because in this case we can have checking between
624 // pointers to the same underlying object.
625 if (!UseDependencies) {
626 for (unsigned I = 0; I < Pointers.size(); ++I)
627 CheckingGroups.emplace_back(I, *this);
628 return;
629 }
630
631 unsigned TotalComparisons = 0;
632
634 for (unsigned Index = 0; Index < Pointers.size(); ++Index)
635 PositionMap[Pointers[Index].PointerValue].push_back(Index);
636
637 // We need to keep track of what pointers we've already seen so we
638 // don't process them twice.
640
641 // Go through all equivalence classes, get the "pointer check groups"
642 // and add them to the overall solution. We use the order in which accesses
643 // appear in 'Pointers' to enforce determinism.
644 for (unsigned I = 0; I < Pointers.size(); ++I) {
645 // We've seen this pointer before, and therefore already processed
646 // its equivalence class.
647 if (Seen.contains(I))
648 continue;
649
651 Pointers[I].IsWritePtr);
652
654
655 // Because DepCands is constructed by visiting accesses in the order in
656 // which they appear in alias sets (which is deterministic) and the
657 // iteration order within an equivalence class member is only dependent on
658 // the order in which unions and insertions are performed on the
659 // equivalence class, the iteration order is deterministic.
660 for (auto M : DepCands.members(Access)) {
661 auto PointerI = PositionMap.find(M.getPointer());
662 // If we can't find the pointer in PositionMap that means we can't
663 // generate a memcheck for it.
664 if (PointerI == PositionMap.end())
665 continue;
666 for (unsigned Pointer : PointerI->second) {
667 bool Merged = false;
668 // Mark this pointer as seen.
669 Seen.insert(Pointer);
670
671 // Go through all the existing sets and see if we can find one
672 // which can include this pointer.
673 for (RuntimeCheckingPtrGroup &Group : Groups) {
674 // Don't perform more than a certain amount of comparisons.
675 // This should limit the cost of grouping the pointers to something
676 // reasonable. If we do end up hitting this threshold, the algorithm
677 // will create separate groups for all remaining pointers.
678 if (TotalComparisons > MemoryCheckMergeThreshold)
679 break;
680
681 TotalComparisons++;
682
683 if (Group.addPointer(Pointer, *this)) {
684 Merged = true;
685 break;
686 }
687 }
688
689 if (!Merged)
690 // We couldn't add this pointer to any existing set or the threshold
691 // for the number of comparisons has been reached. Create a new group
692 // to hold the current pointer.
693 Groups.emplace_back(Pointer, *this);
694 }
695 }
696
697 // We've computed the grouped checks for this partition.
698 // Save the results and continue with the next one.
700 }
701}
702
704 const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1,
705 unsigned PtrIdx2) {
706 return (PtrToPartition[PtrIdx1] != -1 &&
707 PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
708}
709
710bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const {
711 const PointerInfo &PointerI = Pointers[I];
712 const PointerInfo &PointerJ = Pointers[J];
713
714 // No need to check if two readonly pointers intersect.
715 if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr)
716 return false;
717
718 // Only need to check pointers between two different dependency sets.
719 if (PointerI.DependencySetId == PointerJ.DependencySetId)
720 return false;
721
722 // Only need to check pointers in the same alias set.
723 return PointerI.AliasSetId == PointerJ.AliasSetId;
724}
725
726/// Assign each RuntimeCheckingPtrGroup pointer an index for stable UTC output.
730 for (const auto &[Idx, CG] : enumerate(CheckingGroups))
731 PtrIndices[&CG] = Idx;
732 return PtrIndices;
733}
734
737 unsigned Depth) const {
738 unsigned N = 0;
739 auto PtrIndices = getPtrToIdxMap(CheckingGroups);
740 for (const auto &[Check1, Check2] : Checks) {
741 const auto &First = Check1->Members, &Second = Check2->Members;
742 OS.indent(Depth) << "Check " << N++ << ":\n";
743 OS.indent(Depth + 2) << "Comparing group GRP" << PtrIndices.at(Check1)
744 << ":\n";
745 for (unsigned K : First)
746 OS.indent(Depth + 2) << *Pointers[K].PointerValue << "\n";
747 OS.indent(Depth + 2) << "Against group GRP" << PtrIndices.at(Check2)
748 << ":\n";
749 for (unsigned K : Second)
750 OS.indent(Depth + 2) << *Pointers[K].PointerValue << "\n";
751 }
752}
753
755
756 OS.indent(Depth) << "Run-time memory checks:\n";
757 printChecks(OS, Checks, Depth);
758
759 OS.indent(Depth) << "Grouped accesses:\n";
760 auto PtrIndices = getPtrToIdxMap(CheckingGroups);
761 for (const auto &CG : CheckingGroups) {
762 OS.indent(Depth + 2) << "Group GRP" << PtrIndices.at(&CG) << ":\n";
763 OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High
764 << ")\n";
765 for (unsigned Member : CG.Members) {
766 OS.indent(Depth + 6) << "Member: " << *Pointers[Member].Expr << "\n";
767 }
768 }
769}
770
771namespace {
772
773/// Analyses memory accesses in a loop.
774///
775/// Checks whether run time pointer checks are needed and builds sets for data
776/// dependence checking.
777class AccessAnalysis {
778public:
779 /// Read or write access location.
780 typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
781 typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList;
782
783 AccessAnalysis(const Loop *TheLoop, AAResults *AA, const LoopInfo *LI,
786 SmallPtrSetImpl<MDNode *> &LoopAliasScopes)
787 : TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DepCands(DA), PSE(PSE),
788 LoopAliasScopes(LoopAliasScopes) {
789 // We're analyzing dependences across loop iterations.
790 BAA.enableCrossIterationMode();
791 }
792
793 /// Register a load and whether it is only read from.
794 void addLoad(const MemoryLocation &Loc, Type *AccessTy, bool IsReadOnly) {
795 Value *Ptr = const_cast<Value *>(Loc.Ptr);
796 AST.add(adjustLoc(Loc));
797 Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy);
798 if (IsReadOnly)
799 ReadOnlyPtr.insert(Ptr);
800 }
801
802 /// Register a store.
803 void addStore(const MemoryLocation &Loc, Type *AccessTy) {
804 Value *Ptr = const_cast<Value *>(Loc.Ptr);
805 AST.add(adjustLoc(Loc));
806 Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy);
807 }
808
809 /// Check if we can emit a run-time no-alias check for \p Access.
810 ///
811 /// Returns true if we can emit a run-time no alias check for \p Access.
812 /// If we can check this access, this also adds it to a dependence set and
813 /// adds a run-time to check for it to \p RtCheck. If \p Assume is true,
814 /// we will attempt to use additional run-time checks in order to get
815 /// the bounds of the pointer.
816 bool createCheckForAccess(RuntimePointerChecking &RtCheck,
817 MemAccessInfo Access, Type *AccessTy,
818 const DenseMap<Value *, const SCEV *> &Strides,
820 Loop *TheLoop, unsigned &RunningDepId,
821 unsigned ASId, bool Assume);
822
823 /// Check whether we can check the pointers at runtime for
824 /// non-intersection.
825 ///
826 /// Returns true if we need no check or if we do and we can generate them
827 /// (i.e. the pointers have computable bounds). A return value of false means
828 /// we couldn't analyze and generate runtime checks for all pointers in the
829 /// loop, but if \p AllowPartial is set then we will have checks for those
830 /// pointers we could analyze.
831 bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, Loop *TheLoop,
832 const DenseMap<Value *, const SCEV *> &Strides,
833 Value *&UncomputablePtr, bool AllowPartial);
834
835 /// Goes over all memory accesses, checks whether a RT check is needed
836 /// and builds sets of dependent accesses.
837 void buildDependenceSets() {
838 processMemAccesses();
839 }
840
841 /// Initial processing of memory accesses determined that we need to
842 /// perform dependency checking.
843 ///
844 /// Note that this can later be cleared if we retry memcheck analysis without
845 /// dependency checking (i.e. ShouldRetryWithRuntimeChecks).
846 bool isDependencyCheckNeeded() const { return !CheckDeps.empty(); }
847
848 /// We decided that no dependence analysis would be used. Reset the state.
849 void resetDepChecks(MemoryDepChecker &DepChecker) {
850 CheckDeps.clear();
851 DepChecker.clearDependences();
852 }
853
854 const MemAccessInfoList &getDependenciesToCheck() const { return CheckDeps; }
855
856private:
858
859 /// Adjust the MemoryLocation so that it represents accesses to this
860 /// location across all iterations, rather than a single one.
861 MemoryLocation adjustLoc(MemoryLocation Loc) const {
862 // The accessed location varies within the loop, but remains within the
863 // underlying object.
865 Loc.AATags.Scope = adjustAliasScopeList(Loc.AATags.Scope);
866 Loc.AATags.NoAlias = adjustAliasScopeList(Loc.AATags.NoAlias);
867 return Loc;
868 }
869
870 /// Drop alias scopes that are only valid within a single loop iteration.
871 MDNode *adjustAliasScopeList(MDNode *ScopeList) const {
872 if (!ScopeList)
873 return nullptr;
874
875 // For the sake of simplicity, drop the whole scope list if any scope is
876 // iteration-local.
877 if (any_of(ScopeList->operands(), [&](Metadata *Scope) {
878 return LoopAliasScopes.contains(cast<MDNode>(Scope));
879 }))
880 return nullptr;
881
882 return ScopeList;
883 }
884
885 /// Go over all memory access and check whether runtime pointer checks
886 /// are needed and build sets of dependency check candidates.
887 void processMemAccesses();
888
889 /// Map of all accesses. Values are the types used to access memory pointed to
890 /// by the pointer.
891 PtrAccessMap Accesses;
892
893 /// The loop being checked.
894 const Loop *TheLoop;
895
896 /// List of accesses that need a further dependence check.
897 MemAccessInfoList CheckDeps;
898
899 /// Set of pointers that are read only.
900 SmallPtrSet<Value*, 16> ReadOnlyPtr;
901
902 /// Batched alias analysis results.
903 BatchAAResults BAA;
904
905 /// An alias set tracker to partition the access set by underlying object and
906 //intrinsic property (such as TBAA metadata).
907 AliasSetTracker AST;
908
909 /// The LoopInfo of the loop being checked.
910 const LoopInfo *LI;
911
912 /// Sets of potentially dependent accesses - members of one set share an
913 /// underlying pointer. The set "CheckDeps" identfies which sets really need a
914 /// dependence check.
916
917 /// Initial processing of memory accesses determined that we may need
918 /// to add memchecks. Perform the analysis to determine the necessary checks.
919 ///
920 /// Note that, this is different from isDependencyCheckNeeded. When we retry
921 /// memcheck analysis without dependency checking
922 /// (i.e. ShouldRetryWithRuntimeChecks), isDependencyCheckNeeded is
923 /// cleared while this remains set if we have potentially dependent accesses.
924 bool IsRTCheckAnalysisNeeded = false;
925
926 /// The SCEV predicate containing all the SCEV-related assumptions.
928
930
931 /// Alias scopes that are declared inside the loop, and as such not valid
932 /// across iterations.
933 SmallPtrSetImpl<MDNode *> &LoopAliasScopes;
934};
935
936} // end anonymous namespace
937
938/// Try to compute a constant stride for \p AR. Used by getPtrStride and
939/// isNoWrap.
940static std::optional<int64_t>
941getStrideFromAddRec(const SCEVAddRecExpr *AR, const Loop *Lp, Type *AccessTy,
943 if (isa<ScalableVectorType>(AccessTy)) {
944 LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy
945 << "\n");
946 return std::nullopt;
947 }
948
949 // The access function must stride over the innermost loop.
950 if (Lp != AR->getLoop()) {
951 LLVM_DEBUG({
952 dbgs() << "LAA: Bad stride - Not striding over innermost loop ";
953 if (Ptr)
954 dbgs() << *Ptr << " ";
955
956 dbgs() << "SCEV: " << *AR << "\n";
957 });
958 return std::nullopt;
959 }
960
961 // Check the step is constant.
962 const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
963
964 // Calculate the pointer stride and check if it is constant.
965 const APInt *APStepVal;
966 if (!match(Step, m_scev_APInt(APStepVal))) {
967 LLVM_DEBUG({
968 dbgs() << "LAA: Bad stride - Not a constant strided ";
969 if (Ptr)
970 dbgs() << *Ptr << " ";
971 dbgs() << "SCEV: " << *AR << "\n";
972 });
973 return std::nullopt;
974 }
975
976 const auto &DL = Lp->getHeader()->getDataLayout();
977 TypeSize AllocSize = DL.getTypeAllocSize(AccessTy);
978 int64_t Size = AllocSize.getFixedValue();
979
980 // Huge step value - give up.
981 std::optional<int64_t> StepVal = APStepVal->trySExtValue();
982 if (!StepVal)
983 return std::nullopt;
984
985 // Strided access.
986 return *StepVal % Size ? std::nullopt : std::make_optional(*StepVal / Size);
987}
988
989/// Check whether \p AR is a non-wrapping AddRec. If \p Ptr is not nullptr, use
990/// informating from the IR pointer value to determine no-wrap.
992 Value *Ptr, Type *AccessTy, const Loop *L, bool Assume,
993 std::optional<int64_t> Stride = std::nullopt) {
994 // FIXME: This should probably only return true for NUW.
996 return true;
997
999 return true;
1000
1001 // An nusw getelementptr that is an AddRec cannot wrap. If it would wrap,
1002 // the distance between the previously accessed location and the wrapped
1003 // location will be larger than half the pointer index type space. In that
1004 // case, the GEP would be poison and any memory access dependent on it would
1005 // be immediate UB when executed.
1006 if (auto *GEP = dyn_cast_if_present<GetElementPtrInst>(Ptr);
1007 GEP && GEP->hasNoUnsignedSignedWrap())
1008 return true;
1009
1010 if (!Stride)
1011 Stride = getStrideFromAddRec(AR, L, AccessTy, Ptr, PSE);
1012 if (Stride) {
1013 // If the null pointer is undefined, then a access sequence which would
1014 // otherwise access it can be assumed not to unsigned wrap. Note that this
1015 // assumes the object in memory is aligned to the natural alignment.
1016 unsigned AddrSpace = AR->getType()->getPointerAddressSpace();
1017 if (!NullPointerIsDefined(L->getHeader()->getParent(), AddrSpace) &&
1018 (Stride == 1 || Stride == -1))
1019 return true;
1020 }
1021
1022 if (Ptr && Assume) {
1024 LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap:\n"
1025 << "LAA: Pointer: " << *Ptr << "\n"
1026 << "LAA: SCEV: " << *AR << "\n"
1027 << "LAA: Added an overflow assumption\n");
1028 return true;
1029 }
1030
1031 return false;
1032}
1033
1034static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
1035 function_ref<void(Value *)> AddPointer) {
1037 SmallVector<Value *> WorkList;
1038 WorkList.push_back(StartPtr);
1039
1040 while (!WorkList.empty()) {
1041 Value *Ptr = WorkList.pop_back_val();
1042 if (!Visited.insert(Ptr).second)
1043 continue;
1044 auto *PN = dyn_cast<PHINode>(Ptr);
1045 // SCEV does not look through non-header PHIs inside the loop. Such phis
1046 // can be analyzed by adding separate accesses for each incoming pointer
1047 // value.
1048 if (PN && InnermostLoop.contains(PN->getParent()) &&
1049 PN->getParent() != InnermostLoop.getHeader()) {
1050 llvm::append_range(WorkList, PN->incoming_values());
1051 } else
1052 AddPointer(Ptr);
1053 }
1054}
1055
1056// Walk back through the IR for a pointer, looking for a select like the
1057// following:
1058//
1059// %offset = select i1 %cmp, i64 %a, i64 %b
1060// %addr = getelementptr double, double* %base, i64 %offset
1061// %ld = load double, double* %addr, align 8
1062//
1063// We won't be able to form a single SCEVAddRecExpr from this since the
1064// address for each loop iteration depends on %cmp. We could potentially
1065// produce multiple valid SCEVAddRecExprs, though, and check all of them for
1066// memory safety/aliasing if needed.
1067//
1068// If we encounter some IR we don't yet handle, or something obviously fine
1069// like a constant, then we just add the SCEV for that term to the list passed
1070// in by the caller. If we have a node that may potentially yield a valid
1071// SCEVAddRecExpr then we decompose it into parts and build the SCEV terms
1072// ourselves before adding to the list.
1074 ScalarEvolution *SE, const Loop *L, Value *Ptr,
1076 unsigned Depth) {
1077 // If our Value is a SCEVAddRecExpr, loop invariant, not an instruction, or
1078 // we've exceeded our limit on recursion, just return whatever we have
1079 // regardless of whether it can be used for a forked pointer or not, along
1080 // with an indication of whether it might be a poison or undef value.
1081 const SCEV *Scev = SE->getSCEV(Ptr);
1082 if (isa<SCEVAddRecExpr>(Scev) || L->isLoopInvariant(Ptr) ||
1083 !isa<Instruction>(Ptr) || Depth == 0) {
1084 ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr));
1085 return;
1086 }
1087
1088 Depth--;
1089
1090 auto UndefPoisonCheck = [](PointerIntPair<const SCEV *, 1, bool> S) {
1091 return get<1>(S);
1092 };
1093
1094 auto GetBinOpExpr = [&SE](unsigned Opcode, const SCEV *L, const SCEV *R) {
1095 switch (Opcode) {
1096 case Instruction::Add:
1097 return SE->getAddExpr(L, R);
1098 case Instruction::Sub:
1099 return SE->getMinusSCEV(L, R);
1100 default:
1101 llvm_unreachable("Unexpected binary operator when walking ForkedPtrs");
1102 }
1103 };
1104
1105 Instruction *I = cast<Instruction>(Ptr);
1106 unsigned Opcode = I->getOpcode();
1107 switch (Opcode) {
1108 case Instruction::GetElementPtr: {
1109 auto *GEP = cast<GetElementPtrInst>(I);
1110 Type *SourceTy = GEP->getSourceElementType();
1111 // We only handle base + single offset GEPs here for now.
1112 // Not dealing with preexisting gathers yet, so no vectors.
1113 if (I->getNumOperands() != 2 || SourceTy->isVectorTy()) {
1114 ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(GEP));
1115 break;
1116 }
1119 findForkedSCEVs(SE, L, I->getOperand(0), BaseScevs, Depth);
1120 findForkedSCEVs(SE, L, I->getOperand(1), OffsetScevs, Depth);
1121
1122 // See if we need to freeze our fork...
1123 bool NeedsFreeze = any_of(BaseScevs, UndefPoisonCheck) ||
1124 any_of(OffsetScevs, UndefPoisonCheck);
1125
1126 // Check that we only have a single fork, on either the base or the offset.
1127 // Copy the SCEV across for the one without a fork in order to generate
1128 // the full SCEV for both sides of the GEP.
1129 if (OffsetScevs.size() == 2 && BaseScevs.size() == 1)
1130 BaseScevs.push_back(BaseScevs[0]);
1131 else if (BaseScevs.size() == 2 && OffsetScevs.size() == 1)
1132 OffsetScevs.push_back(OffsetScevs[0]);
1133 else {
1134 ScevList.emplace_back(Scev, NeedsFreeze);
1135 break;
1136 }
1137
1138 Type *IntPtrTy = SE->getEffectiveSCEVType(GEP->getPointerOperandType());
1139
1140 // Find the size of the type being pointed to. We only have a single
1141 // index term (guarded above) so we don't need to index into arrays or
1142 // structures, just get the size of the scalar value.
1143 const SCEV *Size = SE->getSizeOfExpr(IntPtrTy, SourceTy);
1144
1145 for (auto [B, O] : zip(BaseScevs, OffsetScevs)) {
1146 const SCEV *Base = get<0>(B);
1147 const SCEV *Offset = get<0>(O);
1148
1149 // Scale up the offsets by the size of the type, then add to the bases.
1150 const SCEV *Scaled =
1151 SE->getMulExpr(Size, SE->getTruncateOrSignExtend(Offset, IntPtrTy));
1152 ScevList.emplace_back(SE->getAddExpr(Base, Scaled), NeedsFreeze);
1153 }
1154 break;
1155 }
1156 case Instruction::Select: {
1158 // A select means we've found a forked pointer, but we currently only
1159 // support a single select per pointer so if there's another behind this
1160 // then we just bail out and return the generic SCEV.
1161 findForkedSCEVs(SE, L, I->getOperand(1), ChildScevs, Depth);
1162 findForkedSCEVs(SE, L, I->getOperand(2), ChildScevs, Depth);
1163 if (ChildScevs.size() == 2)
1164 append_range(ScevList, ChildScevs);
1165 else
1166 ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr));
1167 break;
1168 }
1169 case Instruction::PHI: {
1171 // A phi means we've found a forked pointer, but we currently only
1172 // support a single phi per pointer so if there's another behind this
1173 // then we just bail out and return the generic SCEV.
1174 if (I->getNumOperands() == 2) {
1175 findForkedSCEVs(SE, L, I->getOperand(0), ChildScevs, Depth);
1176 findForkedSCEVs(SE, L, I->getOperand(1), ChildScevs, Depth);
1177 }
1178 if (ChildScevs.size() == 2)
1179 append_range(ScevList, ChildScevs);
1180 else
1181 ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr));
1182 break;
1183 }
1184 case Instruction::Add:
1185 case Instruction::Sub: {
1188 findForkedSCEVs(SE, L, I->getOperand(0), LScevs, Depth);
1189 findForkedSCEVs(SE, L, I->getOperand(1), RScevs, Depth);
1190
1191 // See if we need to freeze our fork...
1192 bool NeedsFreeze =
1193 any_of(LScevs, UndefPoisonCheck) || any_of(RScevs, UndefPoisonCheck);
1194
1195 // Check that we only have a single fork, on either the left or right side.
1196 // Copy the SCEV across for the one without a fork in order to generate
1197 // the full SCEV for both sides of the BinOp.
1198 if (LScevs.size() == 2 && RScevs.size() == 1)
1199 RScevs.push_back(RScevs[0]);
1200 else if (RScevs.size() == 2 && LScevs.size() == 1)
1201 LScevs.push_back(LScevs[0]);
1202 else {
1203 ScevList.emplace_back(Scev, NeedsFreeze);
1204 break;
1205 }
1206
1207 for (auto [L, R] : zip(LScevs, RScevs))
1208 ScevList.emplace_back(GetBinOpExpr(Opcode, get<0>(L), get<0>(R)),
1209 NeedsFreeze);
1210 break;
1211 }
1212 default:
1213 // Just return the current SCEV if we haven't handled the instruction yet.
1214 LLVM_DEBUG(dbgs() << "ForkedPtr unhandled instruction: " << *I << "\n");
1215 ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr));
1216 break;
1217 }
1218}
1219
1222 const DenseMap<Value *, const SCEV *> &StridesMap, Value *Ptr,
1223 const Loop *L) {
1224 ScalarEvolution *SE = PSE.getSE();
1225 assert(SE->isSCEVable(Ptr->getType()) && "Value is not SCEVable!");
1227 findForkedSCEVs(SE, L, Ptr, Scevs, MaxForkedSCEVDepth);
1228
1229 // For now, we will only accept a forked pointer with two possible SCEVs
1230 // that are either SCEVAddRecExprs or loop invariant.
1231 if (Scevs.size() == 2 &&
1232 (isa<SCEVAddRecExpr>(get<0>(Scevs[0])) ||
1233 SE->isLoopInvariant(get<0>(Scevs[0]), L)) &&
1234 (isa<SCEVAddRecExpr>(get<0>(Scevs[1])) ||
1235 SE->isLoopInvariant(get<0>(Scevs[1]), L))) {
1236 LLVM_DEBUG(dbgs() << "LAA: Found forked pointer: " << *Ptr << "\n");
1237 LLVM_DEBUG(dbgs() << "\t(1) " << *get<0>(Scevs[0]) << "\n");
1238 LLVM_DEBUG(dbgs() << "\t(2) " << *get<0>(Scevs[1]) << "\n");
1239 return Scevs;
1240 }
1241
1242 return {{replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false}};
1243}
1244
1245bool AccessAnalysis::createCheckForAccess(
1246 RuntimePointerChecking &RtCheck, MemAccessInfo Access, Type *AccessTy,
1247 const DenseMap<Value *, const SCEV *> &StridesMap,
1248 DenseMap<Value *, unsigned> &DepSetId, Loop *TheLoop,
1249 unsigned &RunningDepId, unsigned ASId, bool Assume) {
1250 Value *Ptr = Access.getPointer();
1251
1253 findForkedPointer(PSE, StridesMap, Ptr, TheLoop);
1254 assert(!TranslatedPtrs.empty() && "must have some translated pointers");
1255
1256 /// Check whether all pointers can participate in a runtime bounds check. They
1257 /// must either be invariant or AddRecs. If ShouldCheckWrap is true, they also
1258 /// must not wrap.
1259 for (auto &P : TranslatedPtrs) {
1260 // The bounds for loop-invariant pointer is trivial.
1261 if (PSE.getSE()->isLoopInvariant(P.getPointer(), TheLoop))
1262 continue;
1263
1264 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(P.getPointer());
1265 if (!AR && Assume)
1266 AR = PSE.getAsAddRec(Ptr);
1267 if (!AR || !AR->isAffine())
1268 return false;
1269
1270 // If there's only one option for Ptr, look it up after bounds and wrap
1271 // checking, because assumptions might have been added to PSE.
1272 if (TranslatedPtrs.size() == 1) {
1273 AR =
1274 cast<SCEVAddRecExpr>(replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr));
1275 P.setPointer(AR);
1276 }
1277
1278 // When we run after a failing dependency check we have to make sure
1279 // we don't have wrapping pointers.
1280 if (!isNoWrap(PSE, AR, TranslatedPtrs.size() == 1 ? Ptr : nullptr, AccessTy,
1281 TheLoop, Assume)) {
1282 return false;
1283 }
1284 }
1285
1286 for (auto [PtrExpr, NeedsFreeze] : TranslatedPtrs) {
1287 // The id of the dependence set.
1288 unsigned DepId;
1289
1290 if (isDependencyCheckNeeded()) {
1291 Value *Leader = DepCands.getLeaderValue(Access).getPointer();
1292 unsigned &LeaderId = DepSetId[Leader];
1293 if (!LeaderId)
1294 LeaderId = RunningDepId++;
1295 DepId = LeaderId;
1296 } else
1297 // Each access has its own dependence set.
1298 DepId = RunningDepId++;
1299
1300 bool IsWrite = Access.getInt();
1301 RtCheck.insert(TheLoop, Ptr, PtrExpr, AccessTy, IsWrite, DepId, ASId, PSE,
1302 NeedsFreeze);
1303 LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
1304 }
1305
1306 return true;
1307}
1308
1309bool AccessAnalysis::canCheckPtrAtRT(
1310 RuntimePointerChecking &RtCheck, Loop *TheLoop,
1311 const DenseMap<Value *, const SCEV *> &StridesMap, Value *&UncomputablePtr,
1312 bool AllowPartial) {
1313 // Find pointers with computable bounds. We are going to use this information
1314 // to place a runtime bound check.
1315 bool CanDoRT = true;
1316
1317 bool MayNeedRTCheck = false;
1318 if (!IsRTCheckAnalysisNeeded) return true;
1319
1320 bool IsDepCheckNeeded = isDependencyCheckNeeded();
1321
1322 // We assign a consecutive id to access from different alias sets.
1323 // Accesses between different groups doesn't need to be checked.
1324 unsigned ASId = 0;
1325 for (const auto &AS : AST) {
1326 int NumReadPtrChecks = 0;
1327 int NumWritePtrChecks = 0;
1328 bool CanDoAliasSetRT = true;
1329 ++ASId;
1330 auto ASPointers = AS.getPointers();
1331
1332 // We assign consecutive id to access from different dependence sets.
1333 // Accesses within the same set don't need a runtime check.
1334 unsigned RunningDepId = 1;
1336
1338
1339 // First, count how many write and read accesses are in the alias set. Also
1340 // collect MemAccessInfos for later.
1342 for (const Value *ConstPtr : ASPointers) {
1343 Value *Ptr = const_cast<Value *>(ConstPtr);
1344 bool IsWrite = Accesses.contains(MemAccessInfo(Ptr, true));
1345 if (IsWrite)
1346 ++NumWritePtrChecks;
1347 else
1348 ++NumReadPtrChecks;
1349 AccessInfos.emplace_back(Ptr, IsWrite);
1350 }
1351
1352 // We do not need runtime checks for this alias set, if there are no writes
1353 // or a single write and no reads.
1354 if (NumWritePtrChecks == 0 ||
1355 (NumWritePtrChecks == 1 && NumReadPtrChecks == 0)) {
1356 assert((ASPointers.size() <= 1 ||
1357 all_of(ASPointers,
1358 [this](const Value *Ptr) {
1359 MemAccessInfo AccessWrite(const_cast<Value *>(Ptr),
1360 true);
1361 return !DepCands.contains(AccessWrite);
1362 })) &&
1363 "Can only skip updating CanDoRT below, if all entries in AS "
1364 "are reads or there is at most 1 entry");
1365 continue;
1366 }
1367
1368 for (auto &Access : AccessInfos) {
1369 for (const auto &AccessTy : Accesses[Access]) {
1370 if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
1371 DepSetId, TheLoop, RunningDepId, ASId,
1372 false)) {
1373 LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:"
1374 << *Access.getPointer() << '\n');
1375 Retries.emplace_back(Access, AccessTy);
1376 CanDoAliasSetRT = false;
1377 }
1378 }
1379 }
1380
1381 // Note that this function computes CanDoRT and MayNeedRTCheck
1382 // independently. For example CanDoRT=false, MayNeedRTCheck=false means that
1383 // we have a pointer for which we couldn't find the bounds but we don't
1384 // actually need to emit any checks so it does not matter.
1385 //
1386 // We need runtime checks for this alias set, if there are at least 2
1387 // dependence sets (in which case RunningDepId > 2) or if we need to re-try
1388 // any bound checks (because in that case the number of dependence sets is
1389 // incomplete).
1390 bool NeedsAliasSetRTCheck = RunningDepId > 2 || !Retries.empty();
1391
1392 // We need to perform run-time alias checks, but some pointers had bounds
1393 // that couldn't be checked.
1394 if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) {
1395 // Reset the CanDoSetRt flag and retry all accesses that have failed.
1396 // We know that we need these checks, so we can now be more aggressive
1397 // and add further checks if required (overflow checks).
1398 CanDoAliasSetRT = true;
1399 for (const auto &[Access, AccessTy] : Retries) {
1400 if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
1401 DepSetId, TheLoop, RunningDepId, ASId,
1402 /*Assume=*/true)) {
1403 CanDoAliasSetRT = false;
1404 UncomputablePtr = Access.getPointer();
1405 if (!AllowPartial)
1406 break;
1407 }
1408 }
1409 }
1410
1411 CanDoRT &= CanDoAliasSetRT;
1412 MayNeedRTCheck |= NeedsAliasSetRTCheck;
1413 ++ASId;
1414 }
1415
1416 // If the pointers that we would use for the bounds comparison have different
1417 // address spaces, assume the values aren't directly comparable, so we can't
1418 // use them for the runtime check. We also have to assume they could
1419 // overlap. In the future there should be metadata for whether address spaces
1420 // are disjoint.
1421 unsigned NumPointers = RtCheck.Pointers.size();
1422 for (unsigned i = 0; i < NumPointers; ++i) {
1423 for (unsigned j = i + 1; j < NumPointers; ++j) {
1424 // Only need to check pointers between two different dependency sets.
1425 if (RtCheck.Pointers[i].DependencySetId ==
1426 RtCheck.Pointers[j].DependencySetId)
1427 continue;
1428 // Only need to check pointers in the same alias set.
1429 if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId)
1430 continue;
1431
1432 Value *PtrI = RtCheck.Pointers[i].PointerValue;
1433 Value *PtrJ = RtCheck.Pointers[j].PointerValue;
1434
1435 unsigned ASi = PtrI->getType()->getPointerAddressSpace();
1436 unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
1437 if (ASi != ASj) {
1438 LLVM_DEBUG(
1439 dbgs() << "LAA: Runtime check would require comparison between"
1440 " different address spaces\n");
1441 return false;
1442 }
1443 }
1444 }
1445
1446 if (MayNeedRTCheck && (CanDoRT || AllowPartial))
1447 RtCheck.generateChecks(DepCands, IsDepCheckNeeded);
1448
1449 LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
1450 << " pointer comparisons.\n");
1451
1452 // If we can do run-time checks, but there are no checks, no runtime checks
1453 // are needed. This can happen when all pointers point to the same underlying
1454 // object for example.
1455 RtCheck.Need = CanDoRT ? RtCheck.getNumberOfChecks() != 0 : MayNeedRTCheck;
1456
1457 bool CanDoRTIfNeeded = !RtCheck.Need || CanDoRT;
1458 assert(CanDoRTIfNeeded == (CanDoRT || !MayNeedRTCheck) &&
1459 "CanDoRTIfNeeded depends on RtCheck.Need");
1460 if (!CanDoRTIfNeeded && !AllowPartial)
1461 RtCheck.reset();
1462 return CanDoRTIfNeeded;
1463}
1464
1465void AccessAnalysis::processMemAccesses() {
1466 // We process the set twice: first we process read-write pointers, last we
1467 // process read-only pointers. This allows us to skip dependence tests for
1468 // read-only pointers.
1469
1470 LLVM_DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
1471 LLVM_DEBUG(dbgs() << " AST: "; AST.dump());
1472 LLVM_DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n");
1473 LLVM_DEBUG({
1474 for (const auto &[A, _] : Accesses)
1475 dbgs() << "\t" << *A.getPointer() << " ("
1476 << (A.getInt()
1477 ? "write"
1478 : (ReadOnlyPtr.contains(A.getPointer()) ? "read-only"
1479 : "read"))
1480 << ")\n";
1481 });
1482
1483 // The AliasSetTracker has nicely partitioned our pointers by metadata
1484 // compatibility and potential for underlying-object overlap. As a result, we
1485 // only need to check for potential pointer dependencies within each alias
1486 // set.
1487 for (const auto &AS : AST) {
1488 // Note that both the alias-set tracker and the alias sets themselves used
1489 // ordered collections internally and so the iteration order here is
1490 // deterministic.
1491 auto ASPointers = AS.getPointers();
1492
1493 bool SetHasWrite = false;
1494
1495 // Map of (pointer to underlying objects, accessed address space) to last
1496 // access encountered.
1497 typedef DenseMap<std::pair<const Value *, unsigned>, MemAccessInfo>
1498 UnderlyingObjToAccessMap;
1499 UnderlyingObjToAccessMap ObjToLastAccess;
1500
1501 // Set of access to check after all writes have been processed.
1502 PtrAccessMap DeferredAccesses;
1503
1504 // Iterate over each alias set twice, once to process read/write pointers,
1505 // and then to process read-only pointers.
1506 for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
1507 bool UseDeferred = SetIteration > 0;
1508 PtrAccessMap &S = UseDeferred ? DeferredAccesses : Accesses;
1509
1510 for (const Value *ConstPtr : ASPointers) {
1511 Value *Ptr = const_cast<Value *>(ConstPtr);
1512
1513 // For a single memory access in AliasSetTracker, Accesses may contain
1514 // both read and write, and they both need to be handled for CheckDeps.
1515 for (const auto &[AC, _] : S) {
1516 if (AC.getPointer() != Ptr)
1517 continue;
1518
1519 bool IsWrite = AC.getInt();
1520
1521 // If we're using the deferred access set, then it contains only
1522 // reads.
1523 bool IsReadOnlyPtr = ReadOnlyPtr.contains(Ptr) && !IsWrite;
1524 if (UseDeferred && !IsReadOnlyPtr)
1525 continue;
1526 // Otherwise, the pointer must be in the PtrAccessSet, either as a
1527 // read or a write.
1528 assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
1529 S.contains(MemAccessInfo(Ptr, false))) &&
1530 "Alias-set pointer not in the access set?");
1531
1532 MemAccessInfo Access(Ptr, IsWrite);
1533 DepCands.insert(Access);
1534
1535 // Memorize read-only pointers for later processing and skip them in
1536 // the first round (they need to be checked after we have seen all
1537 // write pointers). Note: we also mark pointer that are not
1538 // consecutive as "read-only" pointers (so that we check
1539 // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
1540 if (!UseDeferred && IsReadOnlyPtr) {
1541 // We only use the pointer keys, the types vector values don't
1542 // matter.
1543 DeferredAccesses.insert({Access, {}});
1544 continue;
1545 }
1546
1547 // If this is a write - check other reads and writes for conflicts. If
1548 // this is a read only check other writes for conflicts (but only if
1549 // there is no other write to the ptr - this is an optimization to
1550 // catch "a[i] = a[i] + " without having to do a dependence check).
1551 if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
1552 CheckDeps.push_back(Access);
1553 IsRTCheckAnalysisNeeded = true;
1554 }
1555
1556 if (IsWrite)
1557 SetHasWrite = true;
1558
1559 // Create sets of pointers connected by a shared alias set and
1560 // underlying object.
1561 SmallVector<const Value *, 16> &UOs = UnderlyingObjects[Ptr];
1562 UOs = {};
1563 ::getUnderlyingObjects(Ptr, UOs, LI);
1565 << "Underlying objects for pointer " << *Ptr << "\n");
1566 for (const Value *UnderlyingObj : UOs) {
1567 // nullptr never alias, don't join sets for pointer that have "null"
1568 // in their UnderlyingObjects list.
1569 if (isa<ConstantPointerNull>(UnderlyingObj) &&
1571 TheLoop->getHeader()->getParent(),
1572 UnderlyingObj->getType()->getPointerAddressSpace()))
1573 continue;
1574
1575 auto [It, Inserted] = ObjToLastAccess.try_emplace(
1576 {UnderlyingObj,
1577 cast<PointerType>(Ptr->getType())->getAddressSpace()},
1578 Access);
1579 if (!Inserted) {
1580 DepCands.unionSets(Access, It->second);
1581 It->second = Access;
1582 }
1583
1584 LLVM_DEBUG(dbgs() << " " << *UnderlyingObj << "\n");
1585 }
1586 }
1587 }
1588 }
1589 }
1590}
1591
1592/// Check whether the access through \p Ptr has a constant stride.
1593std::optional<int64_t>
1595 const Loop *Lp,
1596 const DenseMap<Value *, const SCEV *> &StridesMap,
1597 bool Assume, bool ShouldCheckWrap) {
1598 const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
1599 if (PSE.getSE()->isLoopInvariant(PtrScev, Lp))
1600 return 0;
1601
1602 assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr");
1603
1604 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
1605 if (Assume && !AR)
1606 AR = PSE.getAsAddRec(Ptr);
1607
1608 if (!AR) {
1609 LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr
1610 << " SCEV: " << *PtrScev << "\n");
1611 return std::nullopt;
1612 }
1613
1614 std::optional<int64_t> Stride =
1615 getStrideFromAddRec(AR, Lp, AccessTy, Ptr, PSE);
1616 if (!ShouldCheckWrap || !Stride)
1617 return Stride;
1618
1619 if (isNoWrap(PSE, AR, Ptr, AccessTy, Lp, Assume, Stride))
1620 return Stride;
1621
1622 LLVM_DEBUG(
1623 dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
1624 << *Ptr << " SCEV: " << *AR << "\n");
1625 return std::nullopt;
1626}
1627
1628std::optional<int64_t> llvm::getPointersDiff(Type *ElemTyA, Value *PtrA,
1629 Type *ElemTyB, Value *PtrB,
1630 const DataLayout &DL,
1631 ScalarEvolution &SE,
1632 bool StrictCheck, bool CheckType) {
1633 assert(PtrA && PtrB && "Expected non-nullptr pointers.");
1634
1635 // Make sure that A and B are different pointers.
1636 if (PtrA == PtrB)
1637 return 0;
1638
1639 // Make sure that the element types are the same if required.
1640 if (CheckType && ElemTyA != ElemTyB)
1641 return std::nullopt;
1642
1643 unsigned ASA = PtrA->getType()->getPointerAddressSpace();
1644 unsigned ASB = PtrB->getType()->getPointerAddressSpace();
1645
1646 // Check that the address spaces match.
1647 if (ASA != ASB)
1648 return std::nullopt;
1649 unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
1650
1651 APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
1652 const Value *PtrA1 = PtrA->stripAndAccumulateConstantOffsets(
1653 DL, OffsetA, /*AllowNonInbounds=*/true);
1654 const Value *PtrB1 = PtrB->stripAndAccumulateConstantOffsets(
1655 DL, OffsetB, /*AllowNonInbounds=*/true);
1656
1657 std::optional<int64_t> Val;
1658 if (PtrA1 == PtrB1) {
1659 // Retrieve the address space again as pointer stripping now tracks through
1660 // `addrspacecast`.
1661 ASA = cast<PointerType>(PtrA1->getType())->getAddressSpace();
1662 ASB = cast<PointerType>(PtrB1->getType())->getAddressSpace();
1663 // Check that the address spaces match and that the pointers are valid.
1664 if (ASA != ASB)
1665 return std::nullopt;
1666
1667 IdxWidth = DL.getIndexSizeInBits(ASA);
1668 OffsetA = OffsetA.sextOrTrunc(IdxWidth);
1669 OffsetB = OffsetB.sextOrTrunc(IdxWidth);
1670
1671 OffsetB -= OffsetA;
1672 Val = OffsetB.trySExtValue();
1673 } else {
1674 // Otherwise compute the distance with SCEV between the base pointers.
1675 const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
1676 const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
1677 std::optional<APInt> Diff =
1678 SE.computeConstantDifference(PtrSCEVB, PtrSCEVA);
1679 if (!Diff)
1680 return std::nullopt;
1681 Val = Diff->trySExtValue();
1682 }
1683
1684 if (!Val)
1685 return std::nullopt;
1686
1687 int64_t Size = DL.getTypeStoreSize(ElemTyA);
1688 int64_t Dist = *Val / Size;
1689
1690 // Ensure that the calculated distance matches the type-based one after all
1691 // the bitcasts removal in the provided pointers.
1692 if (!StrictCheck || Dist * Size == Val)
1693 return Dist;
1694 return std::nullopt;
1695}
1696
1698 const DataLayout &DL, ScalarEvolution &SE,
1699 SmallVectorImpl<unsigned> &SortedIndices) {
1701 VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&
1702 "Expected list of pointer operands.");
1703 // Walk over the pointers, and map each of them to an offset relative to
1704 // first pointer in the array.
1705 Value *Ptr0 = VL[0];
1706
1707 using DistOrdPair = std::pair<int64_t, unsigned>;
1708 auto Compare = llvm::less_first();
1709 std::set<DistOrdPair, decltype(Compare)> Offsets(Compare);
1710 Offsets.emplace(0, 0);
1711 bool IsConsecutive = true;
1712 for (auto [Idx, Ptr] : drop_begin(enumerate(VL))) {
1713 std::optional<int64_t> Diff =
1714 getPointersDiff(ElemTy, Ptr0, ElemTy, Ptr, DL, SE,
1715 /*StrictCheck=*/true);
1716 if (!Diff)
1717 return false;
1718
1719 // Check if the pointer with the same offset is found.
1720 int64_t Offset = *Diff;
1721 auto [It, IsInserted] = Offsets.emplace(Offset, Idx);
1722 if (!IsInserted)
1723 return false;
1724 // Consecutive order if the inserted element is the last one.
1725 IsConsecutive &= std::next(It) == Offsets.end();
1726 }
1727 SortedIndices.clear();
1728 if (!IsConsecutive) {
1729 // Fill SortedIndices array only if it is non-consecutive.
1730 SortedIndices.resize(VL.size());
1731 for (auto [Idx, Off] : enumerate(Offsets))
1732 SortedIndices[Idx] = Off.second;
1733 }
1734 return true;
1735}
1736
1737/// Returns true if the memory operations \p A and \p B are consecutive.
1739 ScalarEvolution &SE, bool CheckType) {
1742 if (!PtrA || !PtrB)
1743 return false;
1744 Type *ElemTyA = getLoadStoreType(A);
1745 Type *ElemTyB = getLoadStoreType(B);
1746 std::optional<int64_t> Diff =
1747 getPointersDiff(ElemTyA, PtrA, ElemTyB, PtrB, DL, SE,
1748 /*StrictCheck=*/true, CheckType);
1749 return Diff == 1;
1750}
1751
1753 visitPointers(SI->getPointerOperand(), *InnermostLoop,
1754 [this, SI](Value *Ptr) {
1755 Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx);
1756 InstMap.push_back(SI);
1757 ++AccessIdx;
1758 });
1759}
1760
1762 visitPointers(LI->getPointerOperand(), *InnermostLoop,
1763 [this, LI](Value *Ptr) {
1764 Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx);
1765 InstMap.push_back(LI);
1766 ++AccessIdx;
1767 });
1768}
1769
1772 switch (Type) {
1773 case NoDep:
1774 case Forward:
1777
1778 case Unknown:
1781 case Backward:
1783 case IndirectUnsafe:
1785 }
1786 llvm_unreachable("unexpected DepType!");
1787}
1788
1790 switch (Type) {
1791 case NoDep:
1792 case Forward:
1793 case ForwardButPreventsForwarding:
1794 case Unknown:
1795 case IndirectUnsafe:
1796 return false;
1797
1798 case BackwardVectorizable:
1799 case Backward:
1800 case BackwardVectorizableButPreventsForwarding:
1801 return true;
1802 }
1803 llvm_unreachable("unexpected DepType!");
1804}
1805
1807 return isBackward() || Type == Unknown || Type == IndirectUnsafe;
1808}
1809
1811 switch (Type) {
1812 case Forward:
1813 case ForwardButPreventsForwarding:
1814 return true;
1815
1816 case NoDep:
1817 case Unknown:
1818 case BackwardVectorizable:
1819 case Backward:
1820 case BackwardVectorizableButPreventsForwarding:
1821 case IndirectUnsafe:
1822 return false;
1823 }
1824 llvm_unreachable("unexpected DepType!");
1825}
1826
1827bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
1828 uint64_t TypeByteSize,
1829 unsigned CommonStride) {
1830 // If loads occur at a distance that is not a multiple of a feasible vector
1831 // factor store-load forwarding does not take place.
1832 // Positive dependences might cause troubles because vectorizing them might
1833 // prevent store-load forwarding making vectorized code run a lot slower.
1834 // a[i] = a[i-3] ^ a[i-8];
1835 // The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
1836 // hence on your typical architecture store-load forwarding does not take
1837 // place. Vectorizing in such cases does not make sense.
1838 // Store-load forwarding distance.
1839
1840 // After this many iterations store-to-load forwarding conflicts should not
1841 // cause any slowdowns.
1842 const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
1843 // Maximum vector factor.
1844 uint64_t MaxVFWithoutSLForwardIssuesPowerOf2 =
1845 std::min(VectorizerParams::MaxVectorWidth * TypeByteSize,
1846 MaxStoreLoadForwardSafeDistanceInBits);
1847
1848 // Compute the smallest VF at which the store and load would be misaligned.
1849 for (uint64_t VF = 2 * TypeByteSize;
1850 VF <= MaxVFWithoutSLForwardIssuesPowerOf2; VF *= 2) {
1851 // If the number of vector iteration between the store and the load are
1852 // small we could incur conflicts.
1853 if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
1854 MaxVFWithoutSLForwardIssuesPowerOf2 = (VF >> 1);
1855 break;
1856 }
1857 }
1858
1859 if (MaxVFWithoutSLForwardIssuesPowerOf2 < 2 * TypeByteSize) {
1860 LLVM_DEBUG(
1861 dbgs() << "LAA: Distance " << Distance
1862 << " that could cause a store-load forwarding conflict\n");
1863 return true;
1864 }
1865
1866 if (CommonStride &&
1867 MaxVFWithoutSLForwardIssuesPowerOf2 <
1868 MaxStoreLoadForwardSafeDistanceInBits &&
1869 MaxVFWithoutSLForwardIssuesPowerOf2 !=
1870 VectorizerParams::MaxVectorWidth * TypeByteSize) {
1871 uint64_t MaxVF =
1872 bit_floor(MaxVFWithoutSLForwardIssuesPowerOf2 / CommonStride);
1873 uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
1874 MaxStoreLoadForwardSafeDistanceInBits =
1875 std::min(MaxStoreLoadForwardSafeDistanceInBits, MaxVFInBits);
1876 }
1877 return false;
1878}
1879
1880void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
1881 if (Status < S)
1882 Status = S;
1883}
1884
1885/// Given a dependence-distance \p Dist between two memory accesses, that have
1886/// strides in the same direction whose absolute value of the maximum stride is
1887/// given in \p MaxStride, in a loop whose maximum backedge taken count is \p
1888/// MaxBTC, check if it is possible to prove statically that the dependence
1889/// distance is larger than the range that the accesses will travel through the
1890/// execution of the loop. If so, return true; false otherwise. This is useful
1891/// for example in loops such as the following (PR31098):
1892///
1893/// for (i = 0; i < D; ++i) {
1894/// = out[i];
1895/// out[i+D] =
1896/// }
1898 const SCEV &MaxBTC, const SCEV &Dist,
1899 uint64_t MaxStride) {
1900
1901 // If we can prove that
1902 // (**) |Dist| > MaxBTC * Step
1903 // where Step is the absolute stride of the memory accesses in bytes,
1904 // then there is no dependence.
1905 //
1906 // Rationale:
1907 // We basically want to check if the absolute distance (|Dist/Step|)
1908 // is >= the loop iteration count (or > MaxBTC).
1909 // This is equivalent to the Strong SIV Test (Practical Dependence Testing,
1910 // Section 4.2.1); Note, that for vectorization it is sufficient to prove
1911 // that the dependence distance is >= VF; This is checked elsewhere.
1912 // But in some cases we can prune dependence distances early, and
1913 // even before selecting the VF, and without a runtime test, by comparing
1914 // the distance against the loop iteration count. Since the vectorized code
1915 // will be executed only if LoopCount >= VF, proving distance >= LoopCount
1916 // also guarantees that distance >= VF.
1917 //
1918 const SCEV *Step = SE.getConstant(MaxBTC.getType(), MaxStride);
1919 const SCEV *Product = SE.getMulExpr(&MaxBTC, Step);
1920
1921 const SCEV *CastedDist = &Dist;
1922 const SCEV *CastedProduct = Product;
1923 uint64_t DistTypeSizeBits = DL.getTypeSizeInBits(Dist.getType());
1924 uint64_t ProductTypeSizeBits = DL.getTypeSizeInBits(Product->getType());
1925
1926 // The dependence distance can be positive/negative, so we sign extend Dist;
1927 // The multiplication of the absolute stride in bytes and the
1928 // backedgeTakenCount is non-negative, so we zero extend Product.
1929 if (DistTypeSizeBits > ProductTypeSizeBits)
1930 CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
1931 else
1932 CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType());
1933
1934 // Is Dist - (MaxBTC * Step) > 0 ?
1935 // (If so, then we have proven (**) because |Dist| >= Dist)
1936 const SCEV *Minus = SE.getMinusSCEV(CastedDist, CastedProduct);
1937 if (SE.isKnownPositive(Minus))
1938 return true;
1939
1940 // Second try: Is -Dist - (MaxBTC * Step) > 0 ?
1941 // (If so, then we have proven (**) because |Dist| >= -1*Dist)
1942 const SCEV *NegDist = SE.getNegativeSCEV(CastedDist);
1943 Minus = SE.getMinusSCEV(NegDist, CastedProduct);
1944 return SE.isKnownPositive(Minus);
1945}
1946
1947/// Check the dependence for two accesses with the same stride \p Stride.
1948/// \p Distance is the positive distance in bytes, and \p TypeByteSize is type
1949/// size in bytes.
1950///
1951/// \returns true if they are independent.
1953 uint64_t TypeByteSize) {
1954 assert(Stride > 1 && "The stride must be greater than 1");
1955 assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
1956 assert(Distance > 0 && "The distance must be non-zero");
1957
1958 // Skip if the distance is not multiple of type byte size.
1959 if (Distance % TypeByteSize)
1960 return false;
1961
1962 // No dependence if the distance is not multiple of the stride.
1963 // E.g.
1964 // for (i = 0; i < 1024 ; i += 4)
1965 // A[i+2] = A[i] + 1;
1966 //
1967 // Two accesses in memory (distance is 2, stride is 4):
1968 // | A[0] | | | | A[4] | | | |
1969 // | | | A[2] | | | | A[6] | |
1970 //
1971 // E.g.
1972 // for (i = 0; i < 1024 ; i += 3)
1973 // A[i+4] = A[i] + 1;
1974 //
1975 // Two accesses in memory (distance is 4, stride is 3):
1976 // | A[0] | | | A[3] | | | A[6] | | |
1977 // | | | | | A[4] | | | A[7] | |
1978 return Distance % Stride;
1979}
1980
1981bool MemoryDepChecker::areAccessesCompletelyBeforeOrAfter(const SCEV *Src,
1982 Type *SrcTy,
1983 const SCEV *Sink,
1984 Type *SinkTy) {
1985 const SCEV *BTC = PSE.getBackedgeTakenCount();
1986 const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount();
1987 ScalarEvolution &SE = *PSE.getSE();
1988 const auto &[SrcStart_, SrcEnd_] =
1989 getStartAndEndForAccess(InnermostLoop, Src, SrcTy, BTC, SymbolicMaxBTC,
1990 &SE, &PointerBounds, DT, AC);
1991 if (isa<SCEVCouldNotCompute>(SrcStart_) || isa<SCEVCouldNotCompute>(SrcEnd_))
1992 return false;
1993
1994 const auto &[SinkStart_, SinkEnd_] =
1995 getStartAndEndForAccess(InnermostLoop, Sink, SinkTy, BTC, SymbolicMaxBTC,
1996 &SE, &PointerBounds, DT, AC);
1997 if (isa<SCEVCouldNotCompute>(SinkStart_) ||
1998 isa<SCEVCouldNotCompute>(SinkEnd_))
1999 return false;
2000
2001 if (!LoopGuards)
2002 LoopGuards.emplace(ScalarEvolution::LoopGuards::collect(InnermostLoop, SE));
2003
2004 auto SrcEnd = SE.applyLoopGuards(SrcEnd_, *LoopGuards);
2005 auto SinkStart = SE.applyLoopGuards(SinkStart_, *LoopGuards);
2006 if (SE.isKnownPredicate(CmpInst::ICMP_ULE, SrcEnd, SinkStart))
2007 return true;
2008
2009 auto SinkEnd = SE.applyLoopGuards(SinkEnd_, *LoopGuards);
2010 auto SrcStart = SE.applyLoopGuards(SrcStart_, *LoopGuards);
2011 return SE.isKnownPredicate(CmpInst::ICMP_ULE, SinkEnd, SrcStart);
2012}
2013
2015 MemoryDepChecker::DepDistanceStrideAndSizeInfo>
2016MemoryDepChecker::getDependenceDistanceStrideAndSize(
2019 const auto &DL = InnermostLoop->getHeader()->getDataLayout();
2020 auto &SE = *PSE.getSE();
2021 const auto &[APtr, AIsWrite] = A;
2022 const auto &[BPtr, BIsWrite] = B;
2023
2024 // Two reads are independent.
2025 if (!AIsWrite && !BIsWrite)
2027
2028 Type *ATy = getLoadStoreType(AInst);
2029 Type *BTy = getLoadStoreType(BInst);
2030
2031 // We cannot check pointers in different address spaces.
2032 if (APtr->getType()->getPointerAddressSpace() !=
2033 BPtr->getType()->getPointerAddressSpace())
2035
2036 std::optional<int64_t> StrideAPtr =
2037 getPtrStride(PSE, ATy, APtr, InnermostLoop, SymbolicStrides, true, true);
2038 std::optional<int64_t> StrideBPtr =
2039 getPtrStride(PSE, BTy, BPtr, InnermostLoop, SymbolicStrides, true, true);
2040
2041 const SCEV *Src = PSE.getSCEV(APtr);
2042 const SCEV *Sink = PSE.getSCEV(BPtr);
2043
2044 // If the induction step is negative we have to invert source and sink of the
2045 // dependence when measuring the distance between them. We should not swap
2046 // AIsWrite with BIsWrite, as their uses expect them in program order.
2047 if (StrideAPtr && *StrideAPtr < 0) {
2048 std::swap(Src, Sink);
2049 std::swap(AInst, BInst);
2050 std::swap(ATy, BTy);
2051 std::swap(StrideAPtr, StrideBPtr);
2052 }
2053
2054 const SCEV *Dist = SE.getMinusSCEV(Sink, Src);
2055
2056 LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
2057 << "\n");
2058 LLVM_DEBUG(dbgs() << "LAA: Distance for " << *AInst << " to " << *BInst
2059 << ": " << *Dist << "\n");
2060
2061 // Need accesses with constant strides and the same direction for further
2062 // dependence analysis. We don't want to vectorize "A[B[i]] += ..." and
2063 // similar code or pointer arithmetic that could wrap in the address space.
2064
2065 // If either Src or Sink are not strided (i.e. not a non-wrapping AddRec) and
2066 // not loop-invariant (stride will be 0 in that case), we cannot analyze the
2067 // dependence further and also cannot generate runtime checks.
2068 if (!StrideAPtr || !StrideBPtr) {
2069 LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
2071 }
2072
2073 int64_t StrideAPtrInt = *StrideAPtr;
2074 int64_t StrideBPtrInt = *StrideBPtr;
2075 LLVM_DEBUG(dbgs() << "LAA: Src induction step: " << StrideAPtrInt
2076 << " Sink induction step: " << StrideBPtrInt << "\n");
2077 // At least Src or Sink are loop invariant and the other is strided or
2078 // invariant. We can generate a runtime check to disambiguate the accesses.
2079 if (!StrideAPtrInt || !StrideBPtrInt)
2081
2082 // Both Src and Sink have a constant stride, check if they are in the same
2083 // direction.
2084 if ((StrideAPtrInt > 0) != (StrideBPtrInt > 0)) {
2085 LLVM_DEBUG(
2086 dbgs() << "Pointer access with strides in different directions\n");
2088 }
2089
2090 TypeSize AStoreSz = DL.getTypeStoreSize(ATy);
2091 TypeSize BStoreSz = DL.getTypeStoreSize(BTy);
2092
2093 // If store sizes are not the same, set TypeByteSize to zero, so we can check
2094 // it in the caller isDependent.
2095 uint64_t ASz = DL.getTypeAllocSize(ATy);
2096 uint64_t BSz = DL.getTypeAllocSize(BTy);
2097 uint64_t TypeByteSize = (AStoreSz == BStoreSz) ? BSz : 0;
2098
2099 uint64_t StrideAScaled = std::abs(StrideAPtrInt) * ASz;
2100 uint64_t StrideBScaled = std::abs(StrideBPtrInt) * BSz;
2101
2102 uint64_t MaxStride = std::max(StrideAScaled, StrideBScaled);
2103
2104 std::optional<uint64_t> CommonStride;
2105 if (StrideAScaled == StrideBScaled)
2106 CommonStride = StrideAScaled;
2107
2108 // TODO: Historically, we didn't retry with runtime checks when (unscaled)
2109 // strides were different but there is no inherent reason to.
2110 if (!isa<SCEVConstant>(Dist))
2111 ShouldRetryWithRuntimeChecks |= StrideAPtrInt == StrideBPtrInt;
2112
2113 // If distance is a SCEVCouldNotCompute, return Unknown immediately.
2114 if (isa<SCEVCouldNotCompute>(Dist)) {
2115 LLVM_DEBUG(dbgs() << "LAA: Uncomputable distance.\n");
2116 return Dependence::Unknown;
2117 }
2118
2119 return DepDistanceStrideAndSizeInfo(Dist, MaxStride, CommonStride,
2120 TypeByteSize, AIsWrite, BIsWrite);
2121}
2122
2124MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
2125 const MemAccessInfo &B, unsigned BIdx) {
2126 assert(AIdx < BIdx && "Must pass arguments in program order");
2127
2128 // Check if we can prove that Sink only accesses memory after Src's end or
2129 // vice versa. The helper is used to perform the checks only on the exit paths
2130 // where it helps to improve the analysis result.
2131 auto CheckCompletelyBeforeOrAfter = [&]() {
2132 auto *APtr = A.getPointer();
2133 auto *BPtr = B.getPointer();
2134 Type *ATy = getLoadStoreType(InstMap[AIdx]);
2135 Type *BTy = getLoadStoreType(InstMap[BIdx]);
2136 const SCEV *Src = PSE.getSCEV(APtr);
2137 const SCEV *Sink = PSE.getSCEV(BPtr);
2138 return areAccessesCompletelyBeforeOrAfter(Src, ATy, Sink, BTy);
2139 };
2140
2141 // Get the dependence distance, stride, type size and what access writes for
2142 // the dependence between A and B.
2143 auto Res =
2144 getDependenceDistanceStrideAndSize(A, InstMap[AIdx], B, InstMap[BIdx]);
2145 if (std::holds_alternative<Dependence::DepType>(Res)) {
2146 if (std::get<Dependence::DepType>(Res) == Dependence::Unknown &&
2147 CheckCompletelyBeforeOrAfter())
2148 return Dependence::NoDep;
2149 return std::get<Dependence::DepType>(Res);
2150 }
2151
2152 auto &[Dist, MaxStride, CommonStride, TypeByteSize, AIsWrite, BIsWrite] =
2153 std::get<DepDistanceStrideAndSizeInfo>(Res);
2154 bool HasSameSize = TypeByteSize > 0;
2155
2156 ScalarEvolution &SE = *PSE.getSE();
2157 auto &DL = InnermostLoop->getHeader()->getDataLayout();
2158
2159 // If the distance between the acecsses is larger than their maximum absolute
2160 // stride multiplied by the symbolic maximum backedge taken count (which is an
2161 // upper bound of the number of iterations), the accesses are independet, i.e.
2162 // they are far enough appart that accesses won't access the same location
2163 // across all loop ierations.
2164 if (HasSameSize &&
2166 DL, SE, *(PSE.getSymbolicMaxBackedgeTakenCount()), *Dist, MaxStride))
2167 return Dependence::NoDep;
2168
2169 // The rest of this function relies on ConstDist being at most 64-bits, which
2170 // is checked earlier. Will assert if the calling code changes.
2171 const APInt *APDist = nullptr;
2172 uint64_t ConstDist =
2173 match(Dist, m_scev_APInt(APDist)) ? APDist->abs().getZExtValue() : 0;
2174
2175 // Attempt to prove strided accesses independent.
2176 if (APDist) {
2177 // If the distance between accesses and their strides are known constants,
2178 // check whether the accesses interlace each other.
2179 if (ConstDist > 0 && CommonStride && CommonStride > 1 && HasSameSize &&
2180 areStridedAccessesIndependent(ConstDist, *CommonStride, TypeByteSize)) {
2181 LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
2182 return Dependence::NoDep;
2183 }
2184 } else {
2185 if (!LoopGuards)
2186 LoopGuards.emplace(
2187 ScalarEvolution::LoopGuards::collect(InnermostLoop, SE));
2188 Dist = SE.applyLoopGuards(Dist, *LoopGuards);
2189 }
2190
2191 // Negative distances are not plausible dependencies.
2192 if (SE.isKnownNonPositive(Dist)) {
2193 if (SE.isKnownNonNegative(Dist)) {
2194 if (HasSameSize) {
2195 // Write to the same location with the same size.
2196 return Dependence::Forward;
2197 }
2198 LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence difference but "
2199 "different type sizes\n");
2200 return Dependence::Unknown;
2201 }
2202
2203 bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
2204 // Check if the first access writes to a location that is read in a later
2205 // iteration, where the distance between them is not a multiple of a vector
2206 // factor and relatively small.
2207 //
2208 // NOTE: There is no need to update MaxSafeVectorWidthInBits after call to
2209 // couldPreventStoreLoadForward, even if it changed MinDepDistBytes, since a
2210 // forward dependency will allow vectorization using any width.
2211
2212 if (IsTrueDataDependence && EnableForwardingConflictDetection) {
2213 if (!ConstDist) {
2214 return CheckCompletelyBeforeOrAfter() ? Dependence::NoDep
2216 }
2217 if (!HasSameSize ||
2218 couldPreventStoreLoadForward(ConstDist, TypeByteSize)) {
2219 LLVM_DEBUG(
2220 dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
2222 }
2223 }
2224
2225 LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n");
2226 return Dependence::Forward;
2227 }
2228
2229 int64_t MinDistance = SE.getSignedRangeMin(Dist).getSExtValue();
2230 // Below we only handle strictly positive distances.
2231 if (MinDistance <= 0) {
2232 return CheckCompletelyBeforeOrAfter() ? Dependence::NoDep
2234 }
2235
2236 if (!HasSameSize) {
2237 if (CheckCompletelyBeforeOrAfter())
2238 return Dependence::NoDep;
2239 LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with "
2240 "different type sizes\n");
2241 return Dependence::Unknown;
2242 }
2243 // Bail out early if passed-in parameters make vectorization not feasible.
2244 unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
2246 unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
2248 // The minimum number of iterations for a vectorized/unrolled version.
2249 unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);
2250
2251 // It's not vectorizable if the distance is smaller than the minimum distance
2252 // needed for a vectroized/unrolled version. Vectorizing one iteration in
2253 // front needs MaxStride. Vectorizing the last iteration needs TypeByteSize.
2254 // (No need to plus the last gap distance).
2255 //
2256 // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
2257 // foo(int *A) {
2258 // int *B = (int *)((char *)A + 14);
2259 // for (i = 0 ; i < 1024 ; i += 2)
2260 // B[i] = A[i] + 1;
2261 // }
2262 //
2263 // Two accesses in memory (stride is 4 * 2):
2264 // | A[0] | | A[2] | | A[4] | | A[6] | |
2265 // | B[0] | | B[2] | | B[4] |
2266 //
2267 // MinDistance needs for vectorizing iterations except the last iteration:
2268 // 4 * 2 * (MinNumIter - 1). MinDistance needs for the last iteration: 4.
2269 // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4.
2270 //
2271 // If MinNumIter is 2, it is vectorizable as the minimum distance needed is
2272 // 12, which is less than distance.
2273 //
2274 // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
2275 // the minimum distance needed is 28, which is greater than distance. It is
2276 // not safe to do vectorization.
2277 //
2278 // We use MaxStride (maximum of src and sink strides) to get a conservative
2279 // lower bound on the MinDistanceNeeded in case of different strides.
2280
2281 // We know that Dist is positive, but it may not be constant. Use the signed
2282 // minimum for computations below, as this ensures we compute the closest
2283 // possible dependence distance.
2284 uint64_t MinDistanceNeeded = MaxStride * (MinNumIter - 1) + TypeByteSize;
2285 if (MinDistanceNeeded > static_cast<uint64_t>(MinDistance)) {
2286 if (!ConstDist) {
2287 // For non-constant distances, we checked the lower bound of the
2288 // dependence distance and the distance may be larger at runtime (and safe
2289 // for vectorization). Classify it as Unknown, so we re-try with runtime
2290 // checks, unless we can prove both accesses cannot overlap.
2291 return CheckCompletelyBeforeOrAfter() ? Dependence::NoDep
2293 }
2294 LLVM_DEBUG(dbgs() << "LAA: Failure because of positive minimum distance "
2295 << MinDistance << '\n');
2296 return Dependence::Backward;
2297 }
2298
2299 // Unsafe if the minimum distance needed is greater than smallest dependence
2300 // distance distance.
2301 if (MinDistanceNeeded > MinDepDistBytes) {
2302 LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least "
2303 << MinDistanceNeeded << " size in bytes\n");
2304 return Dependence::Backward;
2305 }
2306
2307 MinDepDistBytes =
2308 std::min(static_cast<uint64_t>(MinDistance), MinDepDistBytes);
2309
2310 bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
2311 if (IsTrueDataDependence && EnableForwardingConflictDetection && ConstDist &&
2312 couldPreventStoreLoadForward(MinDistance, TypeByteSize, *CommonStride))
2314
2315 uint64_t MaxVF = MinDepDistBytes / MaxStride;
2316 LLVM_DEBUG(dbgs() << "LAA: Positive min distance " << MinDistance
2317 << " with max VF = " << MaxVF << '\n');
2318
2319 uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
2320 if (!ConstDist && MaxVFInBits < MaxTargetVectorWidthInBits) {
2321 // For non-constant distances, we checked the lower bound of the dependence
2322 // distance and the distance may be larger at runtime (and safe for
2323 // vectorization). Classify it as Unknown, so we re-try with runtime checks,
2324 // unless we can prove both accesses cannot overlap.
2325 return CheckCompletelyBeforeOrAfter() ? Dependence::NoDep
2327 }
2328
2329 if (CheckCompletelyBeforeOrAfter())
2330 return Dependence::NoDep;
2331
2332 MaxSafeVectorWidthInBits = std::min(MaxSafeVectorWidthInBits, MaxVFInBits);
2334}
2335
2337 const MemAccessInfoList &CheckDeps) {
2338
2339 MinDepDistBytes = -1;
2341 for (MemAccessInfo CurAccess : CheckDeps) {
2342 if (Visited.contains(CurAccess))
2343 continue;
2344
2345 // Check accesses within this set.
2347 DepCands.findLeader(CurAccess);
2349 DepCands.member_end();
2350
2351 // Check every access pair.
2352 while (AI != AE) {
2353 Visited.insert(*AI);
2354 bool AIIsWrite = AI->getInt();
2355 // Check loads only against next equivalent class, but stores also against
2356 // other stores in the same equivalence class - to the same address.
2358 (AIIsWrite ? AI : std::next(AI));
2359 while (OI != AE) {
2360 // Check every accessing instruction pair in program order.
2361 auto &Acc = Accesses[*AI];
2362 for (std::vector<unsigned>::iterator I1 = Acc.begin(), I1E = Acc.end();
2363 I1 != I1E; ++I1)
2364 // Scan all accesses of another equivalence class, but only the next
2365 // accesses of the same equivalent class.
2366 for (std::vector<unsigned>::iterator
2367 I2 = (OI == AI ? std::next(I1) : Accesses[*OI].begin()),
2368 I2E = (OI == AI ? I1E : Accesses[*OI].end());
2369 I2 != I2E; ++I2) {
2370 auto A = std::make_pair(&*AI, *I1);
2371 auto B = std::make_pair(&*OI, *I2);
2372
2373 assert(*I1 != *I2);
2374 if (*I1 > *I2)
2375 std::swap(A, B);
2376
2378 isDependent(*A.first, A.second, *B.first, B.second);
2380
2381 // Gather dependences unless we accumulated MaxDependences
2382 // dependences. In that case return as soon as we find the first
2383 // unsafe dependence. This puts a limit on this quadratic
2384 // algorithm.
2385 if (RecordDependences) {
2386 if (Type != Dependence::NoDep)
2387 Dependences.emplace_back(A.second, B.second, Type);
2388
2389 if (Dependences.size() >= MaxDependences) {
2390 RecordDependences = false;
2391 Dependences.clear();
2393 << "Too many dependences, stopped recording\n");
2394 }
2395 }
2396 if (!RecordDependences && !isSafeForVectorization())
2397 return false;
2398 }
2399 ++OI;
2400 }
2401 ++AI;
2402 }
2403 }
2404
2405 LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
2406 return isSafeForVectorization();
2407}
2408
2411 MemAccessInfo Access(Ptr, IsWrite);
2412 auto I = Accesses.find(Access);
2414 if (I != Accesses.end()) {
2415 transform(I->second, std::back_inserter(Insts),
2416 [&](unsigned Idx) { return this->InstMap[Idx]; });
2417 }
2418
2419 return Insts;
2420}
2421
2423 "NoDep",
2424 "Unknown",
2425 "IndirectUnsafe",
2426 "Forward",
2427 "ForwardButPreventsForwarding",
2428 "Backward",
2429 "BackwardVectorizable",
2430 "BackwardVectorizableButPreventsForwarding"};
2431
2433 raw_ostream &OS, unsigned Depth,
2434 const SmallVectorImpl<Instruction *> &Instrs) const {
2435 OS.indent(Depth) << DepName[Type] << ":\n";
2436 OS.indent(Depth + 2) << *Instrs[Source] << " -> \n";
2437 OS.indent(Depth + 2) << *Instrs[Destination] << "\n";
2438}
2439
2440bool LoopAccessInfo::canAnalyzeLoop() {
2441 // We need to have a loop header.
2442 LLVM_DEBUG(dbgs() << "\nLAA: Checking a loop in '"
2443 << TheLoop->getHeader()->getParent()->getName() << "' from "
2444 << TheLoop->getLocStr() << "\n");
2445
2446 // We can only analyze innermost loops.
2447 if (!TheLoop->isInnermost()) {
2448 LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
2449 recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop";
2450 return false;
2451 }
2452
2453 // We must have a single backedge.
2454 if (TheLoop->getNumBackEdges() != 1) {
2455 LLVM_DEBUG(
2456 dbgs() << "LAA: loop control flow is not understood by analyzer\n");
2457 recordAnalysis("CFGNotUnderstood")
2458 << "loop control flow is not understood by analyzer";
2459 return false;
2460 }
2461
2462 // ScalarEvolution needs to be able to find the symbolic max backedge taken
2463 // count, which is an upper bound on the number of loop iterations. The loop
2464 // may execute fewer iterations, if it exits via an uncountable exit.
2465 const SCEV *ExitCount = PSE->getSymbolicMaxBackedgeTakenCount();
2466 if (isa<SCEVCouldNotCompute>(ExitCount)) {
2467 recordAnalysis("CantComputeNumberOfIterations")
2468 << "could not determine number of loop iterations";
2469 LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
2470 return false;
2471 }
2472
2473 LLVM_DEBUG(dbgs() << "LAA: Found an analyzable loop: "
2474 << TheLoop->getHeader()->getName() << "\n");
2475 return true;
2476}
2477
2478bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
2479 const TargetLibraryInfo *TLI,
2480 DominatorTree *DT) {
2481 // Holds the Load and Store instructions.
2484 SmallPtrSet<MDNode *, 8> LoopAliasScopes;
2485
2486 // Holds all the different accesses in the loop.
2487 unsigned NumReads = 0;
2488 unsigned NumReadWrites = 0;
2489
2490 bool HasComplexMemInst = false;
2491
2492 // A runtime check is only legal to insert if there are no convergent calls.
2493 HasConvergentOp = false;
2494
2495 PtrRtChecking->Pointers.clear();
2496 PtrRtChecking->Need = false;
2497
2498 const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
2499
2500 const bool EnableMemAccessVersioningOfLoop =
2502 !TheLoop->getHeader()->getParent()->hasOptSize();
2503
2504 // Traverse blocks in fixed RPOT order, regardless of their storage in the
2505 // loop info, as it may be arbitrary.
2506 LoopBlocksRPO RPOT(TheLoop);
2507 RPOT.perform(LI);
2508 for (BasicBlock *BB : RPOT) {
2509 // Scan the BB and collect legal loads and stores. Also detect any
2510 // convergent instructions.
2511 for (Instruction &I : *BB) {
2512 if (auto *Call = dyn_cast<CallBase>(&I)) {
2513 if (Call->isConvergent())
2514 HasConvergentOp = true;
2515 }
2516
2517 // With both a non-vectorizable memory instruction and a convergent
2518 // operation, found in this loop, no reason to continue the search.
2519 if (HasComplexMemInst && HasConvergentOp)
2520 return false;
2521
2522 // Avoid hitting recordAnalysis multiple times.
2523 if (HasComplexMemInst)
2524 continue;
2525
2526 // Record alias scopes defined inside the loop.
2527 if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
2528 for (Metadata *Op : Decl->getScopeList()->operands())
2529 LoopAliasScopes.insert(cast<MDNode>(Op));
2530
2531 // Many math library functions read the rounding mode. We will only
2532 // vectorize a loop if it contains known function calls that don't set
2533 // the flag. Therefore, it is safe to ignore this read from memory.
2534 auto *Call = dyn_cast<CallInst>(&I);
2535 if (Call && getVectorIntrinsicIDForCall(Call, TLI))
2536 continue;
2537
2538 // If this is a load, save it. If this instruction can read from memory
2539 // but is not a load, we only allow it if it's a call to a function with a
2540 // vector mapping and no pointer arguments.
2541 if (I.mayReadFromMemory()) {
2542 auto hasPointerArgs = [](CallBase *CB) {
2543 return any_of(CB->args(), [](Value const *Arg) {
2544 return Arg->getType()->isPointerTy();
2545 });
2546 };
2547
2548 // If the function has an explicit vectorized counterpart, and does not
2549 // take output/input pointers, we can safely assume that it can be
2550 // vectorized.
2551 if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() &&
2552 !hasPointerArgs(Call) && !VFDatabase::getMappings(*Call).empty())
2553 continue;
2554
2555 auto *Ld = dyn_cast<LoadInst>(&I);
2556 if (!Ld) {
2557 recordAnalysis("CantVectorizeInstruction", Ld)
2558 << "instruction cannot be vectorized";
2559 HasComplexMemInst = true;
2560 continue;
2561 }
2562 if (!Ld->isSimple() && !IsAnnotatedParallel) {
2563 recordAnalysis("NonSimpleLoad", Ld)
2564 << "read with atomic ordering or volatile read";
2565 LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
2566 HasComplexMemInst = true;
2567 continue;
2568 }
2569 NumLoads++;
2570 Loads.push_back(Ld);
2571 DepChecker->addAccess(Ld);
2572 if (EnableMemAccessVersioningOfLoop)
2573 collectStridedAccess(Ld);
2574 continue;
2575 }
2576
2577 // Save 'store' instructions. Abort if other instructions write to memory.
2578 if (I.mayWriteToMemory()) {
2579 auto *St = dyn_cast<StoreInst>(&I);
2580 if (!St) {
2581 recordAnalysis("CantVectorizeInstruction", St)
2582 << "instruction cannot be vectorized";
2583 HasComplexMemInst = true;
2584 continue;
2585 }
2586 if (!St->isSimple() && !IsAnnotatedParallel) {
2587 recordAnalysis("NonSimpleStore", St)
2588 << "write with atomic ordering or volatile write";
2589 LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
2590 HasComplexMemInst = true;
2591 continue;
2592 }
2593 NumStores++;
2594 Stores.push_back(St);
2595 DepChecker->addAccess(St);
2596 if (EnableMemAccessVersioningOfLoop)
2597 collectStridedAccess(St);
2598 }
2599 } // Next instr.
2600 } // Next block.
2601
2602 if (HasComplexMemInst)
2603 return false;
2604
2605 // Now we have two lists that hold the loads and the stores.
2606 // Next, we find the pointers that they use.
2607
2608 // Check if we see any stores. If there are no stores, then we don't
2609 // care if the pointers are *restrict*.
2610 if (!Stores.size()) {
2611 LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
2612 return true;
2613 }
2614
2616 AccessAnalysis Accesses(TheLoop, AA, LI, DepCands, *PSE, LoopAliasScopes);
2617
2618 // Holds the analyzed pointers. We don't want to call getUnderlyingObjects
2619 // multiple times on the same object. If the ptr is accessed twice, once
2620 // for read and once for write, it will only appear once (on the write
2621 // list). This is okay, since we are going to check for conflicts between
2622 // writes and between reads and writes, but not between reads and reads.
2624
2625 // Record uniform store addresses to identify if we have multiple stores
2626 // to the same address.
2627 SmallPtrSet<Value *, 16> UniformStores;
2628
2629 for (StoreInst *ST : Stores) {
2630 Value *Ptr = ST->getPointerOperand();
2631
2632 if (isInvariant(Ptr)) {
2633 // Record store instructions to loop invariant addresses
2634 StoresToInvariantAddresses.push_back(ST);
2635 HasStoreStoreDependenceInvolvingLoopInvariantAddress |=
2636 !UniformStores.insert(Ptr).second;
2637 }
2638
2639 // If we did *not* see this pointer before, insert it to the read-write
2640 // list. At this phase it is only a 'write' list.
2641 Type *AccessTy = getLoadStoreType(ST);
2642 if (Seen.insert({Ptr, AccessTy}).second) {
2643 ++NumReadWrites;
2644
2646 // The TBAA metadata could have a control dependency on the predication
2647 // condition, so we cannot rely on it when determining whether or not we
2648 // need runtime pointer checks.
2649 if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
2650 Loc.AATags.TBAA = nullptr;
2651
2652 visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
2653 [&Accesses, AccessTy, Loc](Value *Ptr) {
2654 MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
2655 Accesses.addStore(NewLoc, AccessTy);
2656 });
2657 }
2658 }
2659
2660 if (IsAnnotatedParallel) {
2661 LLVM_DEBUG(
2662 dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
2663 << "checks.\n");
2664 return true;
2665 }
2666
2667 for (LoadInst *LD : Loads) {
2668 Value *Ptr = LD->getPointerOperand();
2669 // If we did *not* see this pointer before, insert it to the
2670 // read list. If we *did* see it before, then it is already in
2671 // the read-write list. This allows us to vectorize expressions
2672 // such as A[i] += x; Because the address of A[i] is a read-write
2673 // pointer. This only works if the index of A[i] is consecutive.
2674 // If the address of i is unknown (for example A[B[i]]) then we may
2675 // read a few words, modify, and write a few words, and some of the
2676 // words may be written to the same address.
2677 bool IsReadOnlyPtr = false;
2678 Type *AccessTy = getLoadStoreType(LD);
2679 if (Seen.insert({Ptr, AccessTy}).second ||
2680 !getPtrStride(*PSE, AccessTy, Ptr, TheLoop, SymbolicStrides)) {
2681 ++NumReads;
2682 IsReadOnlyPtr = true;
2683 }
2684
2685 // See if there is an unsafe dependency between a load to a uniform address and
2686 // store to the same uniform address.
2687 if (UniformStores.contains(Ptr)) {
2688 LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform "
2689 "load and uniform store to the same address!\n");
2690 HasLoadStoreDependenceInvolvingLoopInvariantAddress = true;
2691 }
2692
2694 // The TBAA metadata could have a control dependency on the predication
2695 // condition, so we cannot rely on it when determining whether or not we
2696 // need runtime pointer checks.
2697 if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
2698 Loc.AATags.TBAA = nullptr;
2699
2700 visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
2701 [&Accesses, AccessTy, Loc, IsReadOnlyPtr](Value *Ptr) {
2702 MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
2703 Accesses.addLoad(NewLoc, AccessTy, IsReadOnlyPtr);
2704 });
2705 }
2706
2707 // If we write (or read-write) to a single destination and there are no
2708 // other reads in this loop then is it safe to vectorize.
2709 if (NumReadWrites == 1 && NumReads == 0) {
2710 LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
2711 return true;
2712 }
2713
2714 // Build dependence sets and check whether we need a runtime pointer bounds
2715 // check.
2716 Accesses.buildDependenceSets();
2717
2718 // Find pointers with computable bounds. We are going to use this information
2719 // to place a runtime bound check.
2720 Value *UncomputablePtr = nullptr;
2721 HasCompletePtrRtChecking = Accesses.canCheckPtrAtRT(
2722 *PtrRtChecking, TheLoop, SymbolicStrides, UncomputablePtr, AllowPartial);
2723 if (!HasCompletePtrRtChecking) {
2724 const auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr);
2725 recordAnalysis("CantIdentifyArrayBounds", I)
2726 << "cannot identify array bounds";
2727 LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
2728 << "the array bounds.\n");
2729 return false;
2730 }
2731
2732 LLVM_DEBUG(
2733 dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n");
2734
2735 bool DepsAreSafe = true;
2736 if (Accesses.isDependencyCheckNeeded()) {
2737 LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
2738 DepsAreSafe =
2739 DepChecker->areDepsSafe(DepCands, Accesses.getDependenciesToCheck());
2740
2741 if (!DepsAreSafe && DepChecker->shouldRetryWithRuntimeChecks()) {
2742 LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
2743
2744 // Clear the dependency checks. We assume they are not needed.
2745 Accesses.resetDepChecks(*DepChecker);
2746
2747 PtrRtChecking->reset();
2748 PtrRtChecking->Need = true;
2749
2750 UncomputablePtr = nullptr;
2751 HasCompletePtrRtChecking =
2752 Accesses.canCheckPtrAtRT(*PtrRtChecking, TheLoop, SymbolicStrides,
2753 UncomputablePtr, AllowPartial);
2754
2755 // Check that we found the bounds for the pointer.
2756 if (!HasCompletePtrRtChecking) {
2757 auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr);
2758 recordAnalysis("CantCheckMemDepsAtRunTime", I)
2759 << "cannot check memory dependencies at runtime";
2760 LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
2761 return false;
2762 }
2763 DepsAreSafe = true;
2764 }
2765 }
2766
2767 if (HasConvergentOp) {
2768 recordAnalysis("CantInsertRuntimeCheckWithConvergent")
2769 << "cannot add control dependency to convergent operation";
2770 LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check "
2771 "would be needed with a convergent operation\n");
2772 return false;
2773 }
2774
2775 if (DepsAreSafe) {
2776 LLVM_DEBUG(
2777 dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
2778 << (PtrRtChecking->Need ? "" : " don't")
2779 << " need runtime memory checks.\n");
2780 return true;
2781 }
2782
2783 emitUnsafeDependenceRemark();
2784 return false;
2785}
2786
2787void LoopAccessInfo::emitUnsafeDependenceRemark() {
2788 const auto *Deps = getDepChecker().getDependences();
2789 if (!Deps)
2790 return;
2791 const auto *Found =
2792 llvm::find_if(*Deps, [](const MemoryDepChecker::Dependence &D) {
2795 });
2796 if (Found == Deps->end())
2797 return;
2798 MemoryDepChecker::Dependence Dep = *Found;
2799
2800 LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
2801
2802 // Emit remark for first unsafe dependence
2803 bool HasForcedDistribution = false;
2804 std::optional<const MDOperand *> Value =
2805 findStringMetadataForLoop(TheLoop, "llvm.loop.distribute.enable");
2806 if (Value) {
2807 const MDOperand *Op = *Value;
2808 assert(Op && mdconst::hasa<ConstantInt>(*Op) && "invalid metadata");
2809 HasForcedDistribution = mdconst::extract<ConstantInt>(*Op)->getZExtValue();
2810 }
2811
2812 const std::string Info =
2813 HasForcedDistribution
2814 ? "unsafe dependent memory operations in loop."
2815 : "unsafe dependent memory operations in loop. Use "
2816 "#pragma clang loop distribute(enable) to allow loop distribution "
2817 "to attempt to isolate the offending operations into a separate "
2818 "loop";
2820 recordAnalysis("UnsafeDep", Dep.getDestination(getDepChecker())) << Info;
2821
2822 switch (Dep.Type) {
2826 llvm_unreachable("Unexpected dependence");
2828 R << "\nBackward loop carried data dependence.";
2829 break;
2831 R << "\nForward loop carried data dependence that prevents "
2832 "store-to-load forwarding.";
2833 break;
2835 R << "\nBackward loop carried data dependence that prevents "
2836 "store-to-load forwarding.";
2837 break;
2839 R << "\nUnsafe indirect dependence.";
2840 break;
2842 R << "\nUnknown data dependence.";
2843 break;
2844 }
2845
2846 if (Instruction *I = Dep.getSource(getDepChecker())) {
2847 DebugLoc SourceLoc = I->getDebugLoc();
2848 if (auto *DD = dyn_cast_or_null<Instruction>(getPointerOperand(I)))
2849 SourceLoc = DD->getDebugLoc();
2850 if (SourceLoc)
2851 R << " Memory location is the same as accessed at "
2852 << ore::NV("Location", SourceLoc);
2853 }
2854}
2855
2857 DominatorTree *DT) {
2858 assert(TheLoop->contains(BB) && "Unknown block used");
2859
2860 // Blocks that do not dominate the latch need predication.
2861 const BasicBlock *Latch = TheLoop->getLoopLatch();
2862 return !DT->dominates(BB, Latch);
2863}
2864
2866LoopAccessInfo::recordAnalysis(StringRef RemarkName, const Instruction *I) {
2867 assert(!Report && "Multiple reports generated");
2868
2869 const BasicBlock *CodeRegion = TheLoop->getHeader();
2870 DebugLoc DL = TheLoop->getStartLoc();
2871
2872 if (I) {
2873 CodeRegion = I->getParent();
2874 // If there is no debug location attached to the instruction, revert back to
2875 // using the loop's.
2876 if (I->getDebugLoc())
2877 DL = I->getDebugLoc();
2878 }
2879
2880 Report = std::make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName,
2881 DL, CodeRegion);
2882 return *Report;
2883}
2884
2886 auto *SE = PSE->getSE();
2887 if (TheLoop->isLoopInvariant(V))
2888 return true;
2889 if (!SE->isSCEVable(V->getType()))
2890 return false;
2891 const SCEV *S = SE->getSCEV(V);
2892 return SE->isLoopInvariant(S, TheLoop);
2893}
2894
2895/// If \p Ptr is a GEP, which has a loop-variant operand, return that operand.
2896/// Otherwise, return \p Ptr.
2898 Loop *Lp) {
2899 auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
2900 if (!GEP)
2901 return Ptr;
2902
2903 Value *V = Ptr;
2904 for (const Use &U : GEP->operands()) {
2905 if (!SE->isLoopInvariant(SE->getSCEV(U), Lp)) {
2906 if (V == Ptr)
2907 V = U;
2908 else
2909 // There must be exactly one loop-variant operand.
2910 return Ptr;
2911 }
2912 }
2913 return V;
2914}
2915
2916/// Get the stride of a pointer access in a loop. Looks for symbolic
2917/// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
2919 auto *PtrTy = dyn_cast<PointerType>(Ptr->getType());
2920 if (!PtrTy)
2921 return nullptr;
2922
2923 // Try to remove a gep instruction to make the pointer (actually index at this
2924 // point) easier analyzable. If OrigPtr is equal to Ptr we are analyzing the
2925 // pointer, otherwise, we are analyzing the index.
2926 Value *OrigPtr = Ptr;
2927
2928 Ptr = getLoopVariantGEPOperand(Ptr, SE, Lp);
2929 const SCEV *V = SE->getSCEV(Ptr);
2930
2931 if (Ptr != OrigPtr)
2932 // Strip off casts.
2933 while (auto *C = dyn_cast<SCEVIntegralCastExpr>(V))
2934 V = C->getOperand();
2935
2937 return nullptr;
2938
2939 // Note that the restriction after this loop invariant check are only
2940 // profitability restrictions.
2941 if (!SE->isLoopInvariant(V, Lp))
2942 return nullptr;
2943
2944 // Look for the loop invariant symbolic value.
2945 if (isa<SCEVUnknown>(V))
2946 return V;
2947
2948 if (auto *C = dyn_cast<SCEVIntegralCastExpr>(V))
2949 if (isa<SCEVUnknown>(C->getOperand()))
2950 return V;
2951
2952 return nullptr;
2953}
2954
2955void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
2956 Value *Ptr = getLoadStorePointerOperand(MemAccess);
2957 if (!Ptr)
2958 return;
2959
2960 // Note: getStrideFromPointer is a *profitability* heuristic. We
2961 // could broaden the scope of values returned here - to anything
2962 // which happens to be loop invariant and contributes to the
2963 // computation of an interesting IV - but we chose not to as we
2964 // don't have a cost model here, and broadening the scope exposes
2965 // far too many unprofitable cases.
2966 const SCEV *StrideExpr = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop);
2967 if (!StrideExpr)
2968 return;
2969
2970 LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for "
2971 "versioning:");
2972 LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *StrideExpr << "\n");
2973
2974 if (!SpeculateUnitStride) {
2975 LLVM_DEBUG(dbgs() << " Chose not to due to -laa-speculate-unit-stride\n");
2976 return;
2977 }
2978
2979 // Avoid adding the "Stride == 1" predicate when we know that
2980 // Stride >= Trip-Count. Such a predicate will effectively optimize a single
2981 // or zero iteration loop, as Trip-Count <= Stride == 1.
2982 //
2983 // TODO: We are currently not making a very informed decision on when it is
2984 // beneficial to apply stride versioning. It might make more sense that the
2985 // users of this analysis (such as the vectorizer) will trigger it, based on
2986 // their specific cost considerations; For example, in cases where stride
2987 // versioning does not help resolving memory accesses/dependences, the
2988 // vectorizer should evaluate the cost of the runtime test, and the benefit
2989 // of various possible stride specializations, considering the alternatives
2990 // of using gather/scatters (if available).
2991
2992 const SCEV *MaxBTC = PSE->getSymbolicMaxBackedgeTakenCount();
2993
2994 // Match the types so we can compare the stride and the MaxBTC.
2995 // The Stride can be positive/negative, so we sign extend Stride;
2996 // The backedgeTakenCount is non-negative, so we zero extend MaxBTC.
2997 const DataLayout &DL = TheLoop->getHeader()->getDataLayout();
2998 uint64_t StrideTypeSizeBits = DL.getTypeSizeInBits(StrideExpr->getType());
2999 uint64_t BETypeSizeBits = DL.getTypeSizeInBits(MaxBTC->getType());
3000 const SCEV *CastedStride = StrideExpr;
3001 const SCEV *CastedBECount = MaxBTC;
3002 ScalarEvolution *SE = PSE->getSE();
3003 if (BETypeSizeBits >= StrideTypeSizeBits)
3004 CastedStride = SE->getNoopOrSignExtend(StrideExpr, MaxBTC->getType());
3005 else
3006 CastedBECount = SE->getZeroExtendExpr(MaxBTC, StrideExpr->getType());
3007 const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount);
3008 // Since TripCount == BackEdgeTakenCount + 1, checking:
3009 // "Stride >= TripCount" is equivalent to checking:
3010 // Stride - MaxBTC> 0
3011 if (SE->isKnownPositive(StrideMinusBETaken)) {
3012 LLVM_DEBUG(
3013 dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "
3014 "Stride==1 predicate will imply that the loop executes "
3015 "at most once.\n");
3016 return;
3017 }
3018 LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.\n");
3019
3020 // Strip back off the integer cast, and check that our result is a
3021 // SCEVUnknown as we expect.
3022 const SCEV *StrideBase = StrideExpr;
3023 if (const auto *C = dyn_cast<SCEVIntegralCastExpr>(StrideBase))
3024 StrideBase = C->getOperand();
3025 SymbolicStrides[Ptr] = cast<SCEVUnknown>(StrideBase);
3026}
3027
3029 const TargetTransformInfo *TTI,
3030 const TargetLibraryInfo *TLI, AAResults *AA,
3031 DominatorTree *DT, LoopInfo *LI,
3032 AssumptionCache *AC, bool AllowPartial)
3033 : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
3034 PtrRtChecking(nullptr), TheLoop(L), AllowPartial(AllowPartial) {
3035 unsigned MaxTargetVectorWidthInBits = std::numeric_limits<unsigned>::max();
3037 // Scale the vector width by 2 as rough estimate to also consider
3038 // interleaving.
3039 MaxTargetVectorWidthInBits =
3041
3042 DepChecker = std::make_unique<MemoryDepChecker>(
3043 *PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits);
3044 PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE);
3045 if (canAnalyzeLoop())
3046 CanVecMem = analyzeLoop(AA, LI, TLI, DT);
3047}
3048
3050 if (CanVecMem) {
3051 OS.indent(Depth) << "Memory dependences are safe";
3052 const MemoryDepChecker &DC = getDepChecker();
3053 if (!DC.isSafeForAnyVectorWidth())
3054 OS << " with a maximum safe vector width of "
3055 << DC.getMaxSafeVectorWidthInBits() << " bits";
3058 OS << ", with a maximum safe store-load forward width of " << SLDist
3059 << " bits";
3060 }
3061 if (PtrRtChecking->Need)
3062 OS << " with run-time checks";
3063 OS << "\n";
3064 }
3065
3066 if (HasConvergentOp)
3067 OS.indent(Depth) << "Has convergent operation in loop\n";
3068
3069 if (Report)
3070 OS.indent(Depth) << "Report: " << Report->getMsg() << "\n";
3071
3072 if (auto *Dependences = DepChecker->getDependences()) {
3073 OS.indent(Depth) << "Dependences:\n";
3074 for (const auto &Dep : *Dependences) {
3075 Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions());
3076 OS << "\n";
3077 }
3078 } else
3079 OS.indent(Depth) << "Too many dependences, not recorded\n";
3080
3081 // List the pair of accesses need run-time checks to prove independence.
3082 PtrRtChecking->print(OS, Depth);
3083 if (PtrRtChecking->Need && !HasCompletePtrRtChecking)
3084 OS.indent(Depth) << "Generated run-time checks are incomplete\n";
3085 OS << "\n";
3086
3087 OS.indent(Depth)
3088 << "Non vectorizable stores to invariant address were "
3089 << (HasStoreStoreDependenceInvolvingLoopInvariantAddress ||
3090 HasLoadStoreDependenceInvolvingLoopInvariantAddress
3091 ? ""
3092 : "not ")
3093 << "found in loop.\n";
3094
3095 OS.indent(Depth) << "SCEV assumptions:\n";
3096 PSE->getPredicate().print(OS, Depth);
3097
3098 OS << "\n";
3099
3100 OS.indent(Depth) << "Expressions re-written:\n";
3101 PSE->print(OS, Depth);
3102}
3103
3105 bool AllowPartial) {
3106 const auto &[It, Inserted] = LoopAccessInfoMap.try_emplace(&L);
3107
3108 // We need to create the LoopAccessInfo if either we don't already have one,
3109 // or if it was created with a different value of AllowPartial.
3110 if (Inserted || It->second->hasAllowPartial() != AllowPartial)
3111 It->second = std::make_unique<LoopAccessInfo>(&L, &SE, TTI, TLI, &AA, &DT,
3112 &LI, AC, AllowPartial);
3113
3114 return *It->second;
3115}
3117 // Collect LoopAccessInfo entries that may keep references to IR outside the
3118 // analyzed loop or SCEVs that may have been modified or invalidated. At the
3119 // moment, that is loops requiring memory or SCEV runtime checks, as those cache
3120 // SCEVs, e.g. for pointer expressions.
3121 for (const auto &[L, LAI] : LoopAccessInfoMap) {
3122 if (LAI->getRuntimePointerChecking()->getChecks().empty() &&
3123 LAI->getPSE().getPredicate().isAlwaysTrue())
3124 continue;
3125 LoopAccessInfoMap.erase(L);
3126 }
3127}
3128
3130 Function &F, const PreservedAnalyses &PA,
3132 // Check whether our analysis is preserved.
3133 auto PAC = PA.getChecker<LoopAccessAnalysis>();
3134 if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
3135 // If not, give up now.
3136 return true;
3137
3138 // Check whether the analyses we depend on became invalid for any reason.
3139 // Skip checking TargetLibraryAnalysis as it is immutable and can't become
3140 // invalid.
3141 return Inv.invalidate<AAManager>(F, PA) ||
3143 Inv.invalidate<LoopAnalysis>(F, PA) ||
3145}
3146
3150 auto &AA = FAM.getResult<AAManager>(F);
3151 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
3152 auto &LI = FAM.getResult<LoopAnalysis>(F);
3154 auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
3155 auto &AC = FAM.getResult<AssumptionAnalysis>(F);
3156 return LoopAccessInfoManager(SE, AA, DT, LI, &TTI, &TLI, &AC);
3157}
3158
3159AnalysisKey LoopAccessAnalysis::Key;
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
@ Scaled
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
DXIL Forward Handle Accesses
DXIL Resource Access
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
#define DEBUG_TYPE
Hexagon Common GEP
#define _
This header defines various interfaces for pass management in LLVM.
static cl::opt< unsigned > MaxDependences("max-dependences", cl::Hidden, cl::desc("Maximum number of dependences collected by " "loop-access analysis (default = 100)"), cl::init(100))
We collect dependences up to this threshold.
static cl::opt< bool > EnableForwardingConflictDetection("store-to-load-forwarding-conflict-detection", cl::Hidden, cl::desc("Enable conflict detection in loop-access analysis"), cl::init(true))
Enable store-to-load forwarding conflict detection.
static void findForkedSCEVs(ScalarEvolution *SE, const Loop *L, Value *Ptr, SmallVectorImpl< PointerIntPair< const SCEV *, 1, bool > > &ScevList, unsigned Depth)
static cl::opt< unsigned > MemoryCheckMergeThreshold("memory-check-merge-threshold", cl::Hidden, cl::desc("Maximum number of comparisons done when trying to merge " "runtime memory checks. (default = 100)"), cl::init(100))
The maximum iterations used to merge memory checks.
static const SCEV * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
static const SCEV * addSCEVNoOverflow(const SCEV *A, const SCEV *B, ScalarEvolution &SE, const Instruction *CtxI)
Returns A + B, if it is guaranteed not to unsigned wrap.
static std::optional< int64_t > getStrideFromAddRec(const SCEVAddRecExpr *AR, const Loop *Lp, Type *AccessTy, Value *Ptr, PredicatedScalarEvolution &PSE)
Try to compute a constant stride for AR.
static cl::opt< unsigned, true > VectorizationInterleave("force-vector-interleave", cl::Hidden, cl::desc("Sets the vectorization interleave count. " "Zero is autoselect."), cl::location(VectorizerParams::VectorizationInterleave))
static cl::opt< bool, true > HoistRuntimeChecks("hoist-runtime-checks", cl::Hidden, cl::desc("Hoist inner loop runtime memory checks to outer loop if possible"), cl::location(VectorizerParams::HoistRuntimeChecks), cl::init(true))
static DenseMap< const RuntimeCheckingPtrGroup *, unsigned > getPtrToIdxMap(ArrayRef< RuntimeCheckingPtrGroup > CheckingGroups)
Assign each RuntimeCheckingPtrGroup pointer an index for stable UTC output.
static cl::opt< unsigned, true > VectorizationFactor("force-vector-width", cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect."), cl::location(VectorizerParams::VectorizationFactor))
static cl::opt< unsigned, true > RuntimeMemoryCheckThreshold("runtime-memory-check-threshold", cl::Hidden, cl::desc("When performing memory disambiguation checks at runtime do not " "generate more than this number of comparisons (default = 8)."), cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8))
static void visitPointers(Value *StartPtr, const Loop &InnermostLoop, function_ref< void(Value *)> AddPointer)
static const SCEV * mulSCEVOverflow(const SCEV *A, const SCEV *B, ScalarEvolution &SE, const Instruction *CtxI)
Returns A * B, if it is guaranteed not to unsigned wrap.
static bool isNoWrap(PredicatedScalarEvolution &PSE, const SCEVAddRecExpr *AR, Value *Ptr, Type *AccessTy, const Loop *L, bool Assume, std::optional< int64_t > Stride=std::nullopt)
Check whether AR is a non-wrapping AddRec.
static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, const SCEV &MaxBTC, const SCEV &Dist, uint64_t MaxStride)
Given a dependence-distance Dist between two memory accesses, that have strides in the same direction...
static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, uint64_t TypeByteSize)
Check the dependence for two accesses with the same stride Stride.
static const SCEV * getMinFromExprs(const SCEV *I, const SCEV *J, ScalarEvolution *SE)
Compare I and J and return the minimum.
static Value * getLoopVariantGEPOperand(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
If Ptr is a GEP, which has a loop-variant operand, return that operand.
static cl::opt< unsigned > MaxForkedSCEVDepth("max-forked-scev-depth", cl::Hidden, cl::desc("Maximum recursion depth when finding forked SCEVs (default = 5)"), cl::init(5))
static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR, const SCEV *MaxBTC, const SCEV *EltSize, ScalarEvolution &SE, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Return true, if evaluating AR at MaxBTC cannot wrap, because AR at MaxBTC is guaranteed inbounds of t...
static cl::opt< bool > SpeculateUnitStride("laa-speculate-unit-stride", cl::Hidden, cl::desc("Speculate that non-constant strides are unit in LAA"), cl::init(true))
static SmallVector< PointerIntPair< const SCEV *, 1, bool > > findForkedPointer(PredicatedScalarEvolution &PSE, const DenseMap< Value *, const SCEV * > &StridesMap, Value *Ptr, const Loop *L)
static cl::opt< bool > EnableMemAccessVersioning("enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symbolic stride memory access versioning"))
This enables versioning on the strides of symbolically striding memory accesses in code like the foll...
This header provides classes for managing per-loop analyses.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file provides utility analysis objects describing memory locations.
uint64_t High
#define P(N)
FunctionAnalysisManager FAM
This file defines the PointerIntPair class.
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition: Debug.h:119
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39
This pass exposes codegen information to IR-level passes.
static const X86InstrFMA3Group Groups[]
A manager for alias analyses.
A private abstract base class describing the concept of an individual alias analysis implementation.
Class for arbitrary precision integers.
Definition: APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1540
APInt abs() const
Get the absolute value.
Definition: APInt.h:1795
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:1041
std::optional< int64_t > trySExtValue() const
Get sign extended value if possible.
Definition: APInt.h:1574
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1562
This templated class represents "all analyses that operate over <a particular IR unit>" (e....
Definition: Analysis.h:50
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:294
bool invalidate(IRUnitT &IR, const PreservedAnalyses &PA)
Trigger the invalidation of some other analysis pass if not already handled and return whether it was...
Definition: PassManager.h:312
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:255
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:412
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:142
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:213
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
Definition: BasicBlock.cpp:252
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1116
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:702
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:706
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:704
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2314
bool isNegative() const
Definition: Constants.h:209
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
A debug info location.
Definition: DebugLoc.h:124
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:203
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:177
iterator end()
Definition: DenseMap.h:87
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:284
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:165
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:135
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
iterator_range< member_iterator > members(const ECValue &ECV) const
bool contains(const ElemTy &V) const
Returns true if V is contained an equivalence class.
const ECValue & insert(const ElemTy &Data)
insert - Insert a new value into the union/find set, ignoring the request if the value already exists...
member_iterator member_end() const
const ElemTy & getLeaderValue(const ElemTy &V) const
getLeaderValue - Return the leader for the specified value that is in the set.
member_iterator findLeader(const ElemTy &V) const
findLeader - Given a value in the set, return a member iterator for the equivalence class it is in.
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:706
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:296
Class to represent integer types.
Definition: DerivedTypes.h:42
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:319
An instruction for reading from memory.
Definition: Instructions.h:180
Value * getPointerOperand()
Definition: Instructions.h:259
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
This analysis provides dependence information for the memory accesses of a loop.
LLVM_ABI Result run(Function &F, FunctionAnalysisManager &AM)
LLVM_ABI bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv)
LLVM_ABI const LoopAccessInfo & getInfo(Loop &L, bool AllowPartial=false)
Drive the analysis of memory accesses in the loop.
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
LLVM_ABI bool isInvariant(Value *V) const
Returns true if value V is loop invariant.
LLVM_ABI void print(raw_ostream &OS, unsigned Depth=0) const
Print the information about the memory accesses in the loop.
LLVM_ABI LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetTransformInfo *TTI, const TargetLibraryInfo *TLI, AAResults *AA, DominatorTree *DT, LoopInfo *LI, AssumptionCache *AC, bool AllowPartial=false)
static LLVM_ABI bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:570
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
BlockT * getHeader() const
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
Definition: LoopIterator.h:172
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:40
bool isLoopInvariant(const Value *V, bool HasCoroSuspendInst=false) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:61
std::string getLocStr() const
Return a string containing the debug location of the loop (file name + line number if present,...
Definition: LoopInfo.cpp:679
bool isAnnotatedParallel() const
Returns true if the loop is annotated parallel.
Definition: LoopInfo.cpp:577
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:644
Metadata node.
Definition: Metadata.h:1077
ArrayRef< MDOperand > operands() const
Definition: Metadata.h:1443
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:899
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
DominatorTree * getDT() const
ArrayRef< unsigned > getOrderForAccess(Value *Ptr, bool IsWrite) const
Return the program order indices for the access location (Ptr, IsWrite).
bool isSafeForAnyStoreLoadForwardDistances() const
Return true if there are no store-load forwarding dependencies.
bool isSafeForAnyVectorWidth() const
Return true if the number of elements that are safe to operate on simultaneously is not bounded.
LLVM_ABI bool areDepsSafe(const DepCandidates &AccessSets, const MemAccessInfoList &CheckDeps)
Check whether the dependencies between the accesses are safe, and records the dependence information ...
const SmallVectorImpl< Instruction * > & getMemoryInstructions() const
The vector of memory access instructions.
bool shouldRetryWithRuntimeChecks() const
In same cases when the dependency check fails we can still vectorize the loop with a dynamic array ac...
const Loop * getInnermostLoop() const
uint64_t getMaxSafeVectorWidthInBits() const
Return the number of elements that are safe to operate on simultaneously, multiplied by the size of t...
bool isSafeForVectorization() const
No memory dependence was encountered that would inhibit vectorization.
AssumptionCache * getAC() const
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
DenseMap< std::pair< const SCEV *, Type * >, std::pair< const SCEV *, const SCEV * > > & getPointerBounds()
LLVM_ABI SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Find the set of instructions that read or write via Ptr.
VectorizationSafetyStatus
Type to keep track of the status of the dependence check.
LLVM_ABI void addAccess(StoreInst *SI)
Register the location (instructions are given increasing numbers) of a write access.
PointerIntPair< Value *, 1, bool > MemAccessInfo
uint64_t getStoreLoadForwardSafeDistanceInBits() const
Return safe power-of-2 number of elements, which do not prevent store-load forwarding,...
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
const Value * Ptr
The address of the start of the location.
Root of the metadata hierarchy.
Definition: Metadata.h:63
Diagnostic information for optimization analysis remarks.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
LLVM_ABI void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEVPredicate & getPredicate() const
LLVM_ABI bool hasNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Returns true if we've proved that V doesn't wrap by means of a SCEV predicate.
LLVM_ABI void setNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Proves that V doesn't overflow by adding SCEV predicate.
LLVM_ABI void print(raw_ostream &OS, unsigned Depth) const
Print the SCEV mappings done by the Predicated Scalar Evolution.
LLVM_ABI const SCEVAddRecExpr * getAsAddRec(Value *V)
Attempts to produce an AddRecExpr for V by adding additional SCEV predicates.
LLVM_ABI const SCEV * getBackedgeTakenCount()
Get the (predicated) backedge count for the analyzed loop.
LLVM_ABI const SCEV * getSymbolicMaxBackedgeTakenCount()
Get the (predicated) symbolic max backedge count for the analyzed loop.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:112
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Definition: Analysis.h:275
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
bool Need
This flag indicates if we need to add the runtime check.
void reset()
Reset the state of the pointer runtime information.
unsigned getNumberOfChecks() const
Returns the number of run-time checks required according to needsChecking.
LLVM_ABI void printChecks(raw_ostream &OS, const SmallVectorImpl< RuntimePointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
LLVM_ABI bool needsChecking(const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
LLVM_ABI void print(raw_ostream &OS, unsigned Depth=0) const
Print the list run-time memory checks necessary.
SmallVector< RuntimeCheckingPtrGroup, 2 > CheckingGroups
Holds a partitioning of pointers into "check groups".
LLVM_ABI void generateChecks(MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies)
Generate the checks and store it.
static LLVM_ABI bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
LLVM_ABI void insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, Type *AccessTy, bool WritePtr, unsigned DepSetId, unsigned ASId, PredicatedScalarEvolution &PSE, bool NeedsFreeze)
Insert a pointer and calculate the start and end SCEVs.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents a constant integer value.
ConstantInt * getValue() const
const APInt & getAPInt() const
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
virtual void print(raw_ostream &OS, unsigned Depth=0) const =0
Prints a textual representation of this predicate with an indentation of Depth.
This class represents an analyzed expression in the program.
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
Analysis pass that exposes the ScalarEvolution for a function.
static LLVM_ABI LoopGuards collect(const Loop *L, ScalarEvolution &SE)
Collect rewrite map for loop guards for loop L, together with flags indicating if NUW and NSW can be ...
The main scalar evolution driver.
LLVM_ABI bool isKnownNonNegative(const SCEV *S)
Test if the given expression is known to be non-negative.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI Type * getWiderType(Type *Ty1, Type *Ty2) const
LLVM_ABI const SCEV * getAbsExpr(const SCEV *Op, bool IsNSW)
LLVM_ABI bool isKnownNonPositive(const SCEV *S)
Test if the given expression is known to be non-positive.
LLVM_ABI bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
LLVM_ABI const SCEV * getUMaxExpr(const SCEV *LHS, const SCEV *RHS)
LLVM_ABI bool willNotOverflow(Instruction::BinaryOps BinOp, bool Signed, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI=nullptr)
Is operation BinOp between LHS and RHS provably does not have a signed/unsigned overflow (Signed)?...
LLVM_ABI const SCEVPredicate * getEqualPredicate(const SCEV *LHS, const SCEV *RHS)
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
LLVM_ABI const SCEV * getPtrToIntExpr(const SCEV *Op, Type *Ty)
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
LLVM_ABI const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
LLVM_ABI const SCEV * getUMinExpr(const SCEV *LHS, const SCEV *RHS, bool Sequential=false)
APInt getSignedRangeMin(const SCEV *S)
Determine the min of the signed range for a particular SCEV.
LLVM_ABI const SCEV * getStoreSizeOfExpr(Type *IntTy, Type *StoreTy)
Return an expression for the store size of StoreTy that is type IntTy.
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getNoopOrZeroExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
LLVM_ABI const SCEV * getCouldNotCompute()
LLVM_ABI const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
LLVM_ABI const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI const SCEV * getSizeOfExpr(Type *IntTy, TypeSize Size)
Return an expression for a TypeSize.
LLVM_ABI std::optional< APInt > computeConstantDifference(const SCEV *LHS, const SCEV *RHS)
Compute LHS - RHS and returns the result as an APInt if it is a constant, and std::nullopt if it isn'...
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
LLVM_ABI const SCEV * getTruncateOrSignExtend(const SCEV *V, Type *Ty, unsigned Depth=0)
Return a SCEV corresponding to a conversion of the input value to the specified type.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:380
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:476
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
bool contains(const T &V) const
Check if the SmallSet contains the given element.
Definition: SmallSet.h:227
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:182
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:938
void resize(size_type N)
Definition: SmallVector.h:639
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
An instruction for storing to memory.
Definition: Instructions.h:296
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI TypeSize getRegisterBitWidth(RegisterKind K) const
LLVM_ABI bool enableScalableVectorization() const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:273
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition: VectorUtils.h:74
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
LLVM_ABI bool canBeFreed() const
Return true if the memory object referred to by V can by freed in the scope for which the SSA value d...
Definition: Value.cpp:816
LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const
Accumulate the constant offset this value has compared to a base pointer.
LLVM_ABI uint64_t getPointerDereferenceableBytes(const DataLayout &DL, bool &CanBeNull, bool &CanBeFreed) const
Returns the number of bytes known to be dereferenceable for the pointer value.
Definition: Value.cpp:878
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:322
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:203
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
class_match< const SCEVConstant > m_SCEVConstant()
specificloop_ty m_SpecificLoop(const Loop *L)
SCEVAffineAddRec_match< Op0_t, Op1_t, class_match< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
specificscev_ty m_scev_Specific(const SCEV *S)
Match if we have a specific specified SCEV.
class_match< const SCEV > m_SCEV()
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
LocationClass< Ty > location(Ty &L)
Definition: CommandLine.h:464
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:338
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
@ Offset
Definition: DWP.cpp:477
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:860
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2491
unsigned getPointerAddressSpace(const Type *T)
Definition: SPIRVUtils.h:294
LLVM_ABI std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
Definition: LoopInfo.cpp:1089
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2155
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1987
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:1172
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
bool isPointerTy(const Type *T)
Definition: SPIRVUtils.h:288
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
LLVM_ABI std::optional< int64_t > getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB, Value *PtrB, const DataLayout &DL, ScalarEvolution &SE, bool StrictCheck=false, bool CheckType=true)
Returns the distance between the pointers PtrA and PtrB iff they are compatible and it is possible to...
LLVM_ABI bool sortPtrAccesses(ArrayRef< Value * > VL, Type *ElemTy, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl< unsigned > &SortedIndices)
Attempt to sort the pointers in VL and return the sorted indices in SortedIndices,...
LLVM_ABI RetainedKnowledge getKnowledgeValidInContext(const Value *V, ArrayRef< Attribute::AttrKind > AttrKinds, AssumptionCache &AC, const Instruction *CtxI, const DominatorTree *DT=nullptr)
Return a valid Knowledge associated to the Value V if its Attribute kind is in AttrKinds and the know...
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
LLVM_ABI const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const DenseMap< Value *, const SCEV * > &PtrToStride, Value *Ptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...
LLVM_ABI bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
LLVM_ABI std::pair< const SCEV *, const SCEV * > getStartAndEndForAccess(const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC, const SCEV *MaxBTC, ScalarEvolution *SE, DenseMap< std::pair< const SCEV *, Type * >, std::pair< const SCEV *, const SCEV * > > *PointerBounds, DominatorTree *DT, AssumptionCache *AC)
Calculate Start and End points of memory access using exact backedge taken count BTC if computable or...
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1777
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:280
LLVM_ABI void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, const LoopInfo *LI=nullptr, unsigned MaxLookup=MaxLookupSearchDepth)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:856
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858
#define N
IR Values for the lower and upper bounds of a pointer evolution.
Definition: LoopUtils.cpp:1884
MDNode * Scope
The tag for alias scope specification (used with noalias).
Definition: Metadata.h:783
MDNode * TBAA
The tag for type-based alias analysis.
Definition: Metadata.h:777
MDNode * NoAlias
The tag specifying the noalias scope.
Definition: Metadata.h:786
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: Analysis.h:29
Dependece between memory access instructions.
Instruction * getDestination(const MemoryDepChecker &DepChecker) const
Return the destination instruction of the dependence.
DepType Type
The type of the dependence.
LLVM_ABI bool isPossiblyBackward() const
May be a lexically backward dependence type (includes Unknown).
Instruction * getSource(const MemoryDepChecker &DepChecker) const
Return the source instruction of the dependence.
LLVM_ABI bool isForward() const
Lexically forward dependence.
LLVM_ABI bool isBackward() const
Lexically backward dependence.
LLVM_ABI void print(raw_ostream &OS, unsigned Depth, const SmallVectorImpl< Instruction * > &Instrs) const
Print the dependence.
DepType
The type of the dependence.
static LLVM_ABI const char * DepName[]
String version of the types.
static LLVM_ABI VectorizationSafetyStatus isSafeForVectorization(DepType Type)
Dependence types that don't prevent vectorization.
Represent one information held inside an operand bundle of an llvm.assume.
unsigned AddressSpace
Address space of the involved pointers.
LLVM_ABI bool addPointer(unsigned Index, const RuntimePointerChecking &RtCheck)
Tries to add the pointer recorded in RtCheck at index Index to this pointer checking group.
bool NeedsFreeze
Whether the pointer needs to be frozen after expansion, e.g.
LLVM_ABI RuntimeCheckingPtrGroup(unsigned Index, const RuntimePointerChecking &RtCheck)
Create a new pointer checking group containing a single pointer, with index Index in RtCheck.
const SCEV * High
The SCEV expression which represents the upper bound of all the pointers in this group.
SmallVector< unsigned, 2 > Members
Indices of all the pointers that constitute this grouping.
const SCEV * Low
The SCEV expression which represents the lower bound of all the pointers in this group.
bool IsWritePtr
Holds the information if this pointer is used for writing to memory.
unsigned DependencySetId
Holds the id of the set of pointers that could be dependent because of a shared underlying object.
unsigned AliasSetId
Holds the id of the disjoint alias set to which this pointer belongs.
static LLVM_ABI const unsigned MaxVectorWidth
Maximum SIMD width.
static LLVM_ABI unsigned VectorizationFactor
VF as overridden by the user.
static LLVM_ABI unsigned RuntimeMemoryCheckThreshold
\When performing memory disambiguation checks at runtime do not make more than this number of compari...
static LLVM_ABI bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
static LLVM_ABI unsigned VectorizationInterleave
Interleave factor as overridden by the user.
static LLVM_ABI bool HoistRuntimeChecks
Function object to check whether the first component of a container supported by std::get (like std::...
Definition: STLExtras.h:1472