LLVM 21.0.0git
LoopAccessAnalysis.h
Go to the documentation of this file.
1//===- llvm/Analysis/LoopAccessAnalysis.h -----------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interface for the loop memory dependence framework that
10// was originally developed for the Loop Vectorizer.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_ANALYSIS_LOOPACCESSANALYSIS_H
15#define LLVM_ANALYSIS_LOOPACCESSANALYSIS_H
16
20#include <optional>
21#include <variant>
22
23namespace llvm {
24
25class AAResults;
26class DataLayout;
27class Loop;
28class raw_ostream;
29class TargetTransformInfo;
30
31/// Collection of parameters shared beetween the Loop Vectorizer and the
32/// Loop Access Analysis.
34 /// Maximum SIMD width.
35 static const unsigned MaxVectorWidth;
36
37 /// VF as overridden by the user.
38 static unsigned VectorizationFactor;
39 /// Interleave factor as overridden by the user.
40 static unsigned VectorizationInterleave;
41 /// True if force-vector-interleave was specified by the user.
42 static bool isInterleaveForced();
43
44 /// \When performing memory disambiguation checks at runtime do not
45 /// make more than this number of comparisons.
47
48 // When creating runtime checks for nested loops, where possible try to
49 // write the checks in a form that allows them to be easily hoisted out of
50 // the outermost loop. For example, we can do this by expanding the range of
51 // addresses considered to include the entire nested loop so that they are
52 // loop invariant.
53 static bool HoistRuntimeChecks;
54};
55
56/// Checks memory dependences among accesses to the same underlying
57/// object to determine whether there vectorization is legal or not (and at
58/// which vectorization factor).
59///
60/// Note: This class will compute a conservative dependence for access to
61/// different underlying pointers. Clients, such as the loop vectorizer, will
62/// sometimes deal these potential dependencies by emitting runtime checks.
63///
64/// We use the ScalarEvolution framework to symbolically evalutate access
65/// functions pairs. Since we currently don't restructure the loop we can rely
66/// on the program order of memory accesses to determine their safety.
67/// At the moment we will only deem accesses as safe for:
68/// * A negative constant distance assuming program order.
69///
70/// Safe: tmp = a[i + 1]; OR a[i + 1] = x;
71/// a[i] = tmp; y = a[i];
72///
73/// The latter case is safe because later checks guarantuee that there can't
74/// be a cycle through a phi node (that is, we check that "x" and "y" is not
75/// the same variable: a header phi can only be an induction or a reduction, a
76/// reduction can't have a memory sink, an induction can't have a memory
77/// source). This is important and must not be violated (or we have to
78/// resort to checking for cycles through memory).
79///
80/// * A positive constant distance assuming program order that is bigger
81/// than the biggest memory access.
82///
83/// tmp = a[i] OR b[i] = x
84/// a[i+2] = tmp y = b[i+2];
85///
86/// Safe distance: 2 x sizeof(a[0]), and 2 x sizeof(b[0]), respectively.
87///
88/// * Zero distances and all accesses have the same size.
89///
91public:
94 /// Set of potential dependent memory accesses.
96
97 /// Type to keep track of the status of the dependence check. The order of
98 /// the elements is important and has to be from most permissive to least
99 /// permissive.
101 // Can vectorize safely without RT checks. All dependences are known to be
102 // safe.
103 Safe,
104 // Can possibly vectorize with RT checks to overcome unknown dependencies.
106 // Cannot vectorize due to known unsafe dependencies.
107 Unsafe,
108 };
109
110 /// Dependece between memory access instructions.
111 struct Dependence {
112 /// The type of the dependence.
113 enum DepType {
114 // No dependence.
116 // We couldn't determine the direction or the distance.
118 // At least one of the memory access instructions may access a loop
119 // varying object, e.g. the address of underlying object is loaded inside
120 // the loop, like A[B[i]]. We cannot determine direction or distance in
121 // those cases, and also are unable to generate any runtime checks.
123
124 // Lexically forward.
125 //
126 // FIXME: If we only have loop-independent forward dependences (e.g. a
127 // read and write of A[i]), LAA will locally deem the dependence "safe"
128 // without querying the MemoryDepChecker. Therefore we can miss
129 // enumerating loop-independent forward dependences in
130 // getDependences. Note that as soon as there are different
131 // indices used to access the same array, the MemoryDepChecker *is*
132 // queried and the dependence list is complete.
134 // Forward, but if vectorized, is likely to prevent store-to-load
135 // forwarding.
137 // Lexically backward.
139 // Backward, but the distance allows a vectorization factor of dependent
140 // on MinDepDistBytes.
142 // Same, but may prevent store-to-load forwarding.
144 };
145
146 /// String version of the types.
147 static const char *DepName[];
148
149 /// Index of the source of the dependence in the InstMap vector.
150 unsigned Source;
151 /// Index of the destination of the dependence in the InstMap vector.
152 unsigned Destination;
153 /// The type of the dependence.
155
158
159 /// Return the source instruction of the dependence.
160 Instruction *getSource(const MemoryDepChecker &DepChecker) const;
161 /// Return the destination instruction of the dependence.
162 Instruction *getDestination(const MemoryDepChecker &DepChecker) const;
163
164 /// Dependence types that don't prevent vectorization.
166
167 /// Lexically forward dependence.
168 bool isForward() const;
169 /// Lexically backward dependence.
170 bool isBackward() const;
171
172 /// May be a lexically backward dependence type (includes Unknown).
173 bool isPossiblyBackward() const;
174
175 /// Print the dependence. \p Instr is used to map the instruction
176 /// indices to instructions.
177 void print(raw_ostream &OS, unsigned Depth,
178 const SmallVectorImpl<Instruction *> &Instrs) const;
179 };
180
182 const DenseMap<Value *, const SCEV *> &SymbolicStrides,
183 unsigned MaxTargetVectorWidthInBits)
184 : PSE(PSE), InnermostLoop(L), SymbolicStrides(SymbolicStrides),
185 MaxTargetVectorWidthInBits(MaxTargetVectorWidthInBits) {}
186
187 /// Register the location (instructions are given increasing numbers)
188 /// of a write access.
189 void addAccess(StoreInst *SI);
190
191 /// Register the location (instructions are given increasing numbers)
192 /// of a write access.
193 void addAccess(LoadInst *LI);
194
195 /// Check whether the dependencies between the accesses are safe.
196 ///
197 /// Only checks sets with elements in \p CheckDeps.
198 bool areDepsSafe(const DepCandidates &AccessSets,
199 const MemAccessInfoList &CheckDeps);
200
201 /// No memory dependence was encountered that would inhibit
202 /// vectorization.
205 }
206
207 /// Return true if the number of elements that are safe to operate on
208 /// simultaneously is not bounded.
210 return MaxSafeVectorWidthInBits == UINT_MAX;
211 }
212
213 /// Return the number of elements that are safe to operate on
214 /// simultaneously, multiplied by the size of the element in bits.
216 return MaxSafeVectorWidthInBits;
217 }
218
219 /// In same cases when the dependency check fails we can still
220 /// vectorize the loop with a dynamic array access check.
222 return FoundNonConstantDistanceDependence &&
224 }
225
226 /// Returns the memory dependences. If null is returned we exceeded
227 /// the MaxDependences threshold and this information is not
228 /// available.
230 return RecordDependences ? &Dependences : nullptr;
231 }
232
233 void clearDependences() { Dependences.clear(); }
234
235 /// The vector of memory access instructions. The indices are used as
236 /// instruction identifiers in the Dependence class.
238 return InstMap;
239 }
240
241 /// Generate a mapping between the memory instructions and their
242 /// indices according to program order.
245
246 for (unsigned I = 0; I < InstMap.size(); ++I)
247 OrderMap[InstMap[I]] = I;
248
249 return OrderMap;
250 }
251
252 /// Find the set of instructions that read or write via \p Ptr.
254 bool isWrite) const;
255
256 /// Return the program order indices for the access location (Ptr, IsWrite).
257 /// Returns an empty ArrayRef if there are no accesses for the location.
259 auto I = Accesses.find({Ptr, IsWrite});
260 if (I != Accesses.end())
261 return I->second;
262 return {};
263 }
264
265 const Loop *getInnermostLoop() const { return InnermostLoop; }
266
268 std::pair<const SCEV *, const SCEV *>> &
270 return PointerBounds;
271 }
272
273private:
274 /// A wrapper around ScalarEvolution, used to add runtime SCEV checks, and
275 /// applies dynamic knowledge to simplify SCEV expressions and convert them
276 /// to a more usable form. We need this in case assumptions about SCEV
277 /// expressions need to be made in order to avoid unknown dependences. For
278 /// example we might assume a unit stride for a pointer in order to prove
279 /// that a memory access is strided and doesn't wrap.
281 const Loop *InnermostLoop;
282
283 /// Reference to map of pointer values to
284 /// their stride symbols, if they have a symbolic stride.
285 const DenseMap<Value *, const SCEV *> &SymbolicStrides;
286
287 /// Maps access locations (ptr, read/write) to program order.
289
290 /// Memory access instructions in program order.
292
293 /// The program order index to be used for the next instruction.
294 unsigned AccessIdx = 0;
295
296 /// The smallest dependence distance in bytes in the loop. This may not be
297 /// the same as the maximum number of bytes that are safe to operate on
298 /// simultaneously.
299 uint64_t MinDepDistBytes = 0;
300
301 /// Number of elements (from consecutive iterations) that are safe to
302 /// operate on simultaneously, multiplied by the size of the element in bits.
303 /// The size of the element is taken from the memory access that is most
304 /// restrictive.
305 uint64_t MaxSafeVectorWidthInBits = -1U;
306
307 /// If we see a non-constant dependence distance we can still try to
308 /// vectorize this loop with runtime checks.
309 bool FoundNonConstantDistanceDependence = false;
310
311 /// Result of the dependence checks, indicating whether the checked
312 /// dependences are safe for vectorization, require RT checks or are known to
313 /// be unsafe.
315
316 //// True if Dependences reflects the dependences in the
317 //// loop. If false we exceeded MaxDependences and
318 //// Dependences is invalid.
319 bool RecordDependences = true;
320
321 /// Memory dependences collected during the analysis. Only valid if
322 /// RecordDependences is true.
323 SmallVector<Dependence, 8> Dependences;
324
325 /// The maximum width of a target's vector registers multiplied by 2 to also
326 /// roughly account for additional interleaving. Is used to decide if a
327 /// backwards dependence with non-constant stride should be classified as
328 /// backwards-vectorizable or unknown (triggering a runtime check).
329 unsigned MaxTargetVectorWidthInBits = 0;
330
331 /// Mapping of SCEV expressions to their expanded pointer bounds (pair of
332 /// start and end pointer expressions).
334 std::pair<const SCEV *, const SCEV *>>
336
337 /// Cache for the loop guards of InnermostLoop.
338 std::optional<ScalarEvolution::LoopGuards> LoopGuards;
339
340 /// Check whether there is a plausible dependence between the two
341 /// accesses.
342 ///
343 /// Access \p A must happen before \p B in program order. The two indices
344 /// identify the index into the program order map.
345 ///
346 /// This function checks whether there is a plausible dependence (or the
347 /// absence of such can't be proved) between the two accesses. If there is a
348 /// plausible dependence but the dependence distance is bigger than one
349 /// element access it records this distance in \p MinDepDistBytes (if this
350 /// distance is smaller than any other distance encountered so far).
351 /// Otherwise, this function returns true signaling a possible dependence.
352 Dependence::DepType isDependent(const MemAccessInfo &A, unsigned AIdx,
353 const MemAccessInfo &B, unsigned BIdx);
354
355 /// Check whether the data dependence could prevent store-load
356 /// forwarding.
357 ///
358 /// \return false if we shouldn't vectorize at all or avoid larger
359 /// vectorization factors by limiting MinDepDistBytes.
360 bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize);
361
362 /// Updates the current safety status with \p S. We can go from Safe to
363 /// either PossiblySafeWithRtChecks or Unsafe and from
364 /// PossiblySafeWithRtChecks to Unsafe.
365 void mergeInStatus(VectorizationSafetyStatus S);
366
367 struct DepDistanceStrideAndSizeInfo {
368 const SCEV *Dist;
369
370 /// Strides could either be scaled (in bytes, taking the size of the
371 /// underlying type into account), or unscaled (in indexing units; unscaled
372 /// stride = scaled stride / size of underlying type). Here, strides are
373 /// unscaled.
374 uint64_t MaxStride;
375 std::optional<uint64_t> CommonStride;
376
377 bool ShouldRetryWithRuntimeCheck;
378 uint64_t TypeByteSize;
379 bool AIsWrite;
380 bool BIsWrite;
381
382 DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t MaxStride,
383 std::optional<uint64_t> CommonStride,
384 bool ShouldRetryWithRuntimeCheck,
385 uint64_t TypeByteSize, bool AIsWrite,
386 bool BIsWrite)
387 : Dist(Dist), MaxStride(MaxStride), CommonStride(CommonStride),
388 ShouldRetryWithRuntimeCheck(ShouldRetryWithRuntimeCheck),
389 TypeByteSize(TypeByteSize), AIsWrite(AIsWrite), BIsWrite(BIsWrite) {}
390 };
391
392 /// Get the dependence distance, strides, type size and whether it is a write
393 /// for the dependence between A and B. Returns a DepType, if we can prove
394 /// there's no dependence or the analysis fails. Outlined to lambda to limit
395 /// he scope of various temporary variables, like A/BPtr, StrideA/BPtr and
396 /// others. Returns either the dependence result, if it could already be
397 /// determined, or a struct containing (Distance, Stride, TypeSize, AIsWrite,
398 /// BIsWrite).
399 std::variant<Dependence::DepType, DepDistanceStrideAndSizeInfo>
400 getDependenceDistanceStrideAndSize(const MemAccessInfo &A, Instruction *AInst,
401 const MemAccessInfo &B,
402 Instruction *BInst);
403};
404
405class RuntimePointerChecking;
406/// A grouping of pointers. A single memcheck is required between
407/// two groups.
409 /// Create a new pointer checking group containing a single
410 /// pointer, with index \p Index in RtCheck.
412 const RuntimePointerChecking &RtCheck);
413
414 /// Tries to add the pointer recorded in RtCheck at index
415 /// \p Index to this pointer checking group. We can only add a pointer
416 /// to a checking group if we will still be able to get
417 /// the upper and lower bounds of the check. Returns true in case
418 /// of success, false otherwise.
419 bool addPointer(unsigned Index, const RuntimePointerChecking &RtCheck);
420 bool addPointer(unsigned Index, const SCEV *Start, const SCEV *End,
421 unsigned AS, bool NeedsFreeze, ScalarEvolution &SE);
422
423 /// The SCEV expression which represents the upper bound of all the
424 /// pointers in this group.
425 const SCEV *High;
426 /// The SCEV expression which represents the lower bound of all the
427 /// pointers in this group.
428 const SCEV *Low;
429 /// Indices of all the pointers that constitute this grouping.
431 /// Address space of the involved pointers.
432 unsigned AddressSpace;
433 /// Whether the pointer needs to be frozen after expansion, e.g. because it
434 /// may be poison outside the loop.
435 bool NeedsFreeze = false;
436};
437
438/// A memcheck which made up of a pair of grouped pointers.
439typedef std::pair<const RuntimeCheckingPtrGroup *,
442
446 unsigned AccessSize;
448
450 unsigned AccessSize, bool NeedsFreeze)
453};
454
455/// Holds information about the memory runtime legality checks to verify
456/// that a group of pointers do not overlap.
459
460public:
461 struct PointerInfo {
462 /// Holds the pointer value that we need to check.
464 /// Holds the smallest byte address accessed by the pointer throughout all
465 /// iterations of the loop.
466 const SCEV *Start;
467 /// Holds the largest byte address accessed by the pointer throughout all
468 /// iterations of the loop, plus 1.
469 const SCEV *End;
470 /// Holds the information if this pointer is used for writing to memory.
472 /// Holds the id of the set of pointers that could be dependent because of a
473 /// shared underlying object.
475 /// Holds the id of the disjoint alias set to which this pointer belongs.
476 unsigned AliasSetId;
477 /// SCEV for the access.
478 const SCEV *Expr;
479 /// True if the pointer expressions needs to be frozen after expansion.
481
483 bool IsWritePtr, unsigned DependencySetId, unsigned AliasSetId,
484 const SCEV *Expr, bool NeedsFreeze)
488 };
489
491 : DC(DC), SE(SE) {}
492
493 /// Reset the state of the pointer runtime information.
494 void reset() {
495 Need = false;
496 CanUseDiffCheck = true;
497 Pointers.clear();
498 Checks.clear();
499 DiffChecks.clear();
500 CheckingGroups.clear();
501 }
502
503 /// Insert a pointer and calculate the start and end SCEVs.
504 /// We need \p PSE in order to compute the SCEV expression of the pointer
505 /// according to the assumptions that we've made during the analysis.
506 /// The method might also version the pointer stride according to \p Strides,
507 /// and add new predicates to \p PSE.
508 void insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, Type *AccessTy,
509 bool WritePtr, unsigned DepSetId, unsigned ASId,
510 PredicatedScalarEvolution &PSE, bool NeedsFreeze);
511
512 /// No run-time memory checking is necessary.
513 bool empty() const { return Pointers.empty(); }
514
515 /// Generate the checks and store it. This also performs the grouping
516 /// of pointers to reduce the number of memchecks necessary.
517 void generateChecks(MemoryDepChecker::DepCandidates &DepCands,
518 bool UseDependencies);
519
520 /// Returns the checks that generateChecks created. They can be used to ensure
521 /// no read/write accesses overlap across all loop iterations.
523 return Checks;
524 }
525
526 // Returns an optional list of (pointer-difference expressions, access size)
527 // pairs that can be used to prove that there are no vectorization-preventing
528 // dependencies at runtime. There are is a vectorization-preventing dependency
529 // if any pointer-difference is <u VF * InterleaveCount * access size. Returns
530 // std::nullopt if pointer-difference checks cannot be used.
531 std::optional<ArrayRef<PointerDiffInfo>> getDiffChecks() const {
532 if (!CanUseDiffCheck)
533 return std::nullopt;
534 return {DiffChecks};
535 }
536
537 /// Decide if we need to add a check between two groups of pointers,
538 /// according to needsChecking.
540 const RuntimeCheckingPtrGroup &N) const;
541
542 /// Returns the number of run-time checks required according to
543 /// needsChecking.
544 unsigned getNumberOfChecks() const { return Checks.size(); }
545
546 /// Print the list run-time memory checks necessary.
547 void print(raw_ostream &OS, unsigned Depth = 0) const;
548
549 /// Print \p Checks.
552 unsigned Depth = 0) const;
553
554 /// This flag indicates if we need to add the runtime check.
555 bool Need = false;
556
557 /// Information about the pointers that may require checking.
559
560 /// Holds a partitioning of pointers into "check groups".
562
563 /// Check if pointers are in the same partition
564 ///
565 /// \p PtrToPartition contains the partition number for pointers (-1 if the
566 /// pointer belongs to multiple partitions).
567 static bool
569 unsigned PtrIdx1, unsigned PtrIdx2);
570
571 /// Decide whether we need to issue a run-time check for pointer at
572 /// index \p I and \p J to prove their independence.
573 bool needsChecking(unsigned I, unsigned J) const;
574
575 /// Return PointerInfo for pointer at index \p PtrIdx.
576 const PointerInfo &getPointerInfo(unsigned PtrIdx) const {
577 return Pointers[PtrIdx];
578 }
579
580 ScalarEvolution *getSE() const { return SE; }
581
582private:
583 /// Groups pointers such that a single memcheck is required
584 /// between two different groups. This will clear the CheckingGroups vector
585 /// and re-compute it. We will only group dependecies if \p UseDependencies
586 /// is true, otherwise we will create a separate group for each pointer.
587 void groupChecks(MemoryDepChecker::DepCandidates &DepCands,
588 bool UseDependencies);
589
590 /// Generate the checks and return them.
592
593 /// Try to create add a new (pointer-difference, access size) pair to
594 /// DiffCheck for checking groups \p CGI and \p CGJ. If pointer-difference
595 /// checks cannot be used for the groups, set CanUseDiffCheck to false.
596 bool tryToCreateDiffCheck(const RuntimeCheckingPtrGroup &CGI,
597 const RuntimeCheckingPtrGroup &CGJ);
598
600
601 /// Holds a pointer to the ScalarEvolution analysis.
602 ScalarEvolution *SE;
603
604 /// Set of run-time checks required to establish independence of
605 /// otherwise may-aliasing pointers in the loop.
607
608 /// Flag indicating if pointer-difference checks can be used
609 bool CanUseDiffCheck = true;
610
611 /// A list of (pointer-difference, access size) pairs that can be used to
612 /// prove that there are no vectorization-preventing dependencies.
614};
615
616/// Drive the analysis of memory accesses in the loop
617///
618/// This class is responsible for analyzing the memory accesses of a loop. It
619/// collects the accesses and then its main helper the AccessAnalysis class
620/// finds and categorizes the dependences in buildDependenceSets.
621///
622/// For memory dependences that can be analyzed at compile time, it determines
623/// whether the dependence is part of cycle inhibiting vectorization. This work
624/// is delegated to the MemoryDepChecker class.
625///
626/// For memory dependences that cannot be determined at compile time, it
627/// generates run-time checks to prove independence. This is done by
628/// AccessAnalysis::canCheckPtrAtRT and the checks are maintained by the
629/// RuntimePointerCheck class.
630///
631/// If pointers can wrap or can't be expressed as affine AddRec expressions by
632/// ScalarEvolution, we will generate run-time checks by emitting a
633/// SCEVUnionPredicate.
634///
635/// Checks for both memory dependences and the SCEV predicates contained in the
636/// PSE must be emitted in order for the results of this analysis to be valid.
638public:
640 const TargetLibraryInfo *TLI, AAResults *AA, DominatorTree *DT,
641 LoopInfo *LI);
642
643 /// Return true we can analyze the memory accesses in the loop and there are
644 /// no memory dependence cycles. Note that for dependences between loads &
645 /// stores with uniform addresses,
646 /// hasStoreStoreDependenceInvolvingLoopInvariantAddress and
647 /// hasLoadStoreDependenceInvolvingLoopInvariantAddress also need to be
648 /// checked.
649 bool canVectorizeMemory() const { return CanVecMem; }
650
651 /// Return true if there is a convergent operation in the loop. There may
652 /// still be reported runtime pointer checks that would be required, but it is
653 /// not legal to insert them.
654 bool hasConvergentOp() const { return HasConvergentOp; }
655
657 return PtrRtChecking.get();
658 }
659
660 /// Number of memchecks required to prove independence of otherwise
661 /// may-alias pointers.
662 unsigned getNumRuntimePointerChecks() const {
663 return PtrRtChecking->getNumberOfChecks();
664 }
665
666 /// Return true if the block BB needs to be predicated in order for the loop
667 /// to be vectorized.
668 static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
669 DominatorTree *DT);
670
671 /// Returns true if value \p V is loop invariant.
672 bool isInvariant(Value *V) const;
673
674 unsigned getNumStores() const { return NumStores; }
675 unsigned getNumLoads() const { return NumLoads;}
676
677 /// The diagnostics report generated for the analysis. E.g. why we
678 /// couldn't analyze the loop.
679 const OptimizationRemarkAnalysis *getReport() const { return Report.get(); }
680
681 /// the Memory Dependence Checker which can determine the
682 /// loop-independent and loop-carried dependences between memory accesses.
683 const MemoryDepChecker &getDepChecker() const { return *DepChecker; }
684
685 /// Return the list of instructions that use \p Ptr to read or write
686 /// memory.
688 bool isWrite) const {
689 return DepChecker->getInstructionsForAccess(Ptr, isWrite);
690 }
691
692 /// If an access has a symbolic strides, this maps the pointer value to
693 /// the stride symbol.
695 return SymbolicStrides;
696 }
697
698 /// Print the information about the memory accesses in the loop.
699 void print(raw_ostream &OS, unsigned Depth = 0) const;
700
701 /// Return true if the loop has memory dependence involving two stores to an
702 /// invariant address, else return false.
704 return HasStoreStoreDependenceInvolvingLoopInvariantAddress;
705 }
706
707 /// Return true if the loop has memory dependence involving a load and a store
708 /// to an invariant address, else return false.
710 return HasLoadStoreDependenceInvolvingLoopInvariantAddress;
711 }
712
713 /// Return the list of stores to invariant addresses.
715 return StoresToInvariantAddresses;
716 }
717
718 /// Used to add runtime SCEV checks. Simplifies SCEV expressions and converts
719 /// them to a more usable form. All SCEV expressions during the analysis
720 /// should be re-written (and therefore simplified) according to PSE.
721 /// A user of LoopAccessAnalysis will need to emit the runtime checks
722 /// associated with this predicate.
723 const PredicatedScalarEvolution &getPSE() const { return *PSE; }
724
725private:
726 /// Analyze the loop. Returns true if all memory access in the loop can be
727 /// vectorized.
728 bool analyzeLoop(AAResults *AA, const LoopInfo *LI,
729 const TargetLibraryInfo *TLI, DominatorTree *DT);
730
731 /// Check if the structure of the loop allows it to be analyzed by this
732 /// pass.
733 bool canAnalyzeLoop();
734
735 /// Save the analysis remark.
736 ///
737 /// LAA does not directly emits the remarks. Instead it stores it which the
738 /// client can retrieve and presents as its own analysis
739 /// (e.g. -Rpass-analysis=loop-vectorize).
741 recordAnalysis(StringRef RemarkName, const Instruction *Instr = nullptr);
742
743 /// Collect memory access with loop invariant strides.
744 ///
745 /// Looks for accesses like "a[i * StrideA]" where "StrideA" is loop
746 /// invariant.
747 void collectStridedAccess(Value *LoadOrStoreInst);
748
749 // Emits the first unsafe memory dependence in a loop.
750 // Emits nothing if there are no unsafe dependences
751 // or if the dependences were not recorded.
752 void emitUnsafeDependenceRemark();
753
754 std::unique_ptr<PredicatedScalarEvolution> PSE;
755
756 /// We need to check that all of the pointers in this list are disjoint
757 /// at runtime. Using std::unique_ptr to make using move ctor simpler.
758 std::unique_ptr<RuntimePointerChecking> PtrRtChecking;
759
760 /// the Memory Dependence Checker which can determine the
761 /// loop-independent and loop-carried dependences between memory accesses.
762 std::unique_ptr<MemoryDepChecker> DepChecker;
763
764 Loop *TheLoop;
765
766 unsigned NumLoads = 0;
767 unsigned NumStores = 0;
768
769 /// Cache the result of analyzeLoop.
770 bool CanVecMem = false;
771 bool HasConvergentOp = false;
772
773 /// Indicator that there are two non vectorizable stores to the same uniform
774 /// address.
775 bool HasStoreStoreDependenceInvolvingLoopInvariantAddress = false;
776 /// Indicator that there is non vectorizable load and store to the same
777 /// uniform address.
778 bool HasLoadStoreDependenceInvolvingLoopInvariantAddress = false;
779
780 /// List of stores to invariant addresses.
781 SmallVector<StoreInst *> StoresToInvariantAddresses;
782
783 /// The diagnostics report generated for the analysis. E.g. why we
784 /// couldn't analyze the loop.
785 std::unique_ptr<OptimizationRemarkAnalysis> Report;
786
787 /// If an access has a symbolic strides, this maps the pointer value to
788 /// the stride symbol.
789 DenseMap<Value *, const SCEV *> SymbolicStrides;
790};
791
792/// Return the SCEV corresponding to a pointer with the symbolic stride
793/// replaced with constant one, assuming the SCEV predicate associated with
794/// \p PSE is true.
795///
796/// If necessary this method will version the stride of the pointer according
797/// to \p PtrToStride and therefore add further predicates to \p PSE.
798///
799/// \p PtrToStride provides the mapping between the pointer value and its
800/// stride as collected by LoopVectorizationLegality::collectStridedAccess.
801const SCEV *
802replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
803 const DenseMap<Value *, const SCEV *> &PtrToStride,
804 Value *Ptr);
805
806/// If the pointer has a constant stride return it in units of the access type
807/// size. If the pointer is loop-invariant, return 0. Otherwise return
808/// std::nullopt.
809///
810/// Ensure that it does not wrap in the address space, assuming the predicate
811/// associated with \p PSE is true.
812///
813/// If necessary this method will version the stride of the pointer according
814/// to \p PtrToStride and therefore add further predicates to \p PSE.
815/// The \p Assume parameter indicates if we are allowed to make additional
816/// run-time assumptions.
817///
818/// Note that the analysis results are defined if-and-only-if the original
819/// memory access was defined. If that access was dead, or UB, then the
820/// result of this function is undefined.
821std::optional<int64_t>
822getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
823 const Loop *Lp,
824 const DenseMap<Value *, const SCEV *> &StridesMap = DenseMap<Value *, const SCEV *>(),
825 bool Assume = false, bool ShouldCheckWrap = true);
826
827/// Returns the distance between the pointers \p PtrA and \p PtrB iff they are
828/// compatible and it is possible to calculate the distance between them. This
829/// is a simple API that does not depend on the analysis pass.
830/// \param StrictCheck Ensure that the calculated distance matches the
831/// type-based one after all the bitcasts removal in the provided pointers.
832std::optional<int> getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB,
833 Value *PtrB, const DataLayout &DL,
834 ScalarEvolution &SE,
835 bool StrictCheck = false,
836 bool CheckType = true);
837
838/// Attempt to sort the pointers in \p VL and return the sorted indices
839/// in \p SortedIndices, if reordering is required.
840///
841/// Returns 'true' if sorting is legal, otherwise returns 'false'.
842///
843/// For example, for a given \p VL of memory accesses in program order, a[i+4],
844/// a[i+0], a[i+1] and a[i+7], this function will sort the \p VL and save the
845/// sorted indices in \p SortedIndices as a[i+0], a[i+1], a[i+4], a[i+7] and
846/// saves the mask for actual memory accesses in program order in
847/// \p SortedIndices as <1,2,0,3>
848bool sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy, const DataLayout &DL,
849 ScalarEvolution &SE,
850 SmallVectorImpl<unsigned> &SortedIndices);
851
852/// Returns true if the memory operations \p A and \p B are consecutive.
853/// This is a simple API that does not depend on the analysis pass.
854bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
855 ScalarEvolution &SE, bool CheckType = true);
856
857/// Calculate Start and End points of memory access.
858/// Let's assume A is the first access and B is a memory access on N-th loop
859/// iteration. Then B is calculated as:
860/// B = A + Step*N .
861/// Step value may be positive or negative.
862/// N is a calculated back-edge taken count:
863/// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0
864/// Start and End points are calculated in the following way:
865/// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt,
866/// where SizeOfElt is the size of single memory access in bytes.
867///
868/// There is no conflict when the intervals are disjoint:
869/// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
870std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess(
871 const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *MaxBECount,
872 ScalarEvolution *SE,
873 DenseMap<std::pair<const SCEV *, Type *>,
874 std::pair<const SCEV *, const SCEV *>> *PointerBounds);
875
877 /// The cache.
879
880 // The used analysis passes.
881 ScalarEvolution &SE;
882 AAResults &AA;
883 DominatorTree &DT;
884 LoopInfo &LI;
886 const TargetLibraryInfo *TLI = nullptr;
887
888public:
891 const TargetLibraryInfo *TLI)
892 : SE(SE), AA(AA), DT(DT), LI(LI), TTI(TTI), TLI(TLI) {}
893
894 const LoopAccessInfo &getInfo(Loop &L);
895
896 void clear();
897
898 bool invalidate(Function &F, const PreservedAnalyses &PA,
900};
901
902/// This analysis provides dependence information for the memory
903/// accesses of a loop.
904///
905/// It runs the analysis for a loop on demand. This can be initiated by
906/// querying the loop access info via AM.getResult<LoopAccessAnalysis>.
907/// getResult return a LoopAccessInfo object. See this class for the
908/// specifics of what information is provided.
910 : public AnalysisInfoMixin<LoopAccessAnalysis> {
912 static AnalysisKey Key;
913
914public:
916
918};
919
921 const MemoryDepChecker &DepChecker) const {
922 return DepChecker.getMemoryInstructions()[Source];
923}
924
926 const MemoryDepChecker &DepChecker) const {
927 return DepChecker.getMemoryInstructions()[Destination];
928}
929
930} // End llvm namespace
931
932#endif
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MapVector< const Value *, unsigned > OrderMap
Definition: AsmWriter.cpp:98
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
uint32_t Index
bool End
Definition: ELF_riscv.cpp:480
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
raw_pwrite_stream & OS
static LLVM_ATTRIBUTE_ALWAYS_INLINE bool CheckType(MVT::SimpleValueType VT, SDValue N, const TargetLowering *TLI, const DataLayout &DL)
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:292
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
An instruction for reading from memory.
Definition: Instructions.h:176
This analysis provides dependence information for the memory accesses of a loop.
LoopAccessInfoManager Result
Result run(Function &F, FunctionAnalysisManager &AM)
bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv)
const LoopAccessInfo & getInfo(Loop &L)
LoopAccessInfoManager(ScalarEvolution &SE, AAResults &AA, DominatorTree &DT, LoopInfo &LI, TargetTransformInfo *TTI, const TargetLibraryInfo *TLI)
Drive the analysis of memory accesses in the loop.
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
ArrayRef< StoreInst * > getStoresToInvariantAddresses() const
Return the list of stores to invariant addresses.
const OptimizationRemarkAnalysis * getReport() const
The diagnostics report generated for the analysis.
const RuntimePointerChecking * getRuntimePointerChecking() const
bool canVectorizeMemory() const
Return true we can analyze the memory accesses in the loop and there are no memory dependence cycles.
unsigned getNumLoads() const
unsigned getNumRuntimePointerChecks() const
Number of memchecks required to prove independence of otherwise may-alias pointers.
bool isInvariant(Value *V) const
Returns true if value V is loop invariant.
bool hasLoadStoreDependenceInvolvingLoopInvariantAddress() const
Return true if the loop has memory dependence involving a load and a store to an invariant address,...
void print(raw_ostream &OS, unsigned Depth=0) const
Print the information about the memory accesses in the loop.
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
unsigned getNumStores() const
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Return the list of instructions that use Ptr to read or write memory.
const DenseMap< Value *, const SCEV * > & getSymbolicStrides() const
If an access has a symbolic strides, this maps the pointer value to the stride symbol.
bool hasStoreStoreDependenceInvolvingLoopInvariantAddress() const
Return true if the loop has memory dependence involving two stores to an invariant address,...
bool hasConvergentOp() const
Return true if there is a convergent operation in the loop.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
ArrayRef< unsigned > getOrderForAccess(Value *Ptr, bool IsWrite) const
Return the program order indices for the access location (Ptr, IsWrite).
bool isSafeForAnyVectorWidth() const
Return true if the number of elements that are safe to operate on simultaneously is not bounded.
bool areDepsSafe(const DepCandidates &AccessSets, const MemAccessInfoList &CheckDeps)
Check whether the dependencies between the accesses are safe.
EquivalenceClasses< MemAccessInfo > DepCandidates
Set of potential dependent memory accesses.
MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L, const DenseMap< Value *, const SCEV * > &SymbolicStrides, unsigned MaxTargetVectorWidthInBits)
const SmallVectorImpl< Instruction * > & getMemoryInstructions() const
The vector of memory access instructions.
const Loop * getInnermostLoop() const
uint64_t getMaxSafeVectorWidthInBits() const
Return the number of elements that are safe to operate on simultaneously, multiplied by the size of t...
bool isSafeForVectorization() const
No memory dependence was encountered that would inhibit vectorization.
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
DenseMap< std::pair< const SCEV *, Type * >, std::pair< const SCEV *, const SCEV * > > & getPointerBounds()
SmallVector< MemAccessInfo, 8 > MemAccessInfoList
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Find the set of instructions that read or write via Ptr.
VectorizationSafetyStatus
Type to keep track of the status of the dependence check.
bool shouldRetryWithRuntimeCheck() const
In same cases when the dependency check fails we can still vectorize the loop with a dynamic array ac...
void addAccess(StoreInst *SI)
Register the location (instructions are given increasing numbers) of a write access.
PointerIntPair< Value *, 1, bool > MemAccessInfo
DenseMap< Instruction *, unsigned > generateInstructionOrderMap() const
Generate a mapping between the memory instructions and their indices according to program order.
Diagnostic information for optimization analysis remarks.
PointerIntPair - This class implements a pair of a pointer and small integer.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
bool Need
This flag indicates if we need to add the runtime check.
void reset()
Reset the state of the pointer runtime information.
unsigned getNumberOfChecks() const
Returns the number of run-time checks required according to needsChecking.
RuntimePointerChecking(MemoryDepChecker &DC, ScalarEvolution *SE)
void printChecks(raw_ostream &OS, const SmallVectorImpl< RuntimePointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
bool needsChecking(const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
void print(raw_ostream &OS, unsigned Depth=0) const
Print the list run-time memory checks necessary.
std::optional< ArrayRef< PointerDiffInfo > > getDiffChecks() const
SmallVector< RuntimeCheckingPtrGroup, 2 > CheckingGroups
Holds a partitioning of pointers into "check groups".
static bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
bool empty() const
No run-time memory checking is necessary.
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
ScalarEvolution * getSE() const
void insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, Type *AccessTy, bool WritePtr, unsigned DepSetId, unsigned ASId, PredicatedScalarEvolution &PSE, bool NeedsFreeze)
Insert a pointer and calculate the start and end SCEVs.
const SmallVectorImpl< RuntimePointerCheck > & getChecks() const
Returns the checks that generateChecks created.
const PointerInfo & getPointerInfo(unsigned PtrIdx) const
Return PointerInfo for pointer at index PtrIdx.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Value handle that tracks a Value across RAUW.
Definition: ValueHandle.h:331
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM Value Representation.
Definition: Value.h:74
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
std::optional< int > getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB, Value *PtrB, const DataLayout &DL, ScalarEvolution &SE, bool StrictCheck=false, bool CheckType=true)
Returns the distance between the pointers PtrA and PtrB iff they are compatible and it is possible to...
std::pair< const RuntimeCheckingPtrGroup *, const RuntimeCheckingPtrGroup * > RuntimePointerCheck
A memcheck which made up of a pair of grouped pointers.
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
bool sortPtrAccesses(ArrayRef< Value * > VL, Type *ElemTy, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl< unsigned > &SortedIndices)
Attempt to sort the pointers in VL and return the sorted indices in SortedIndices,...
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const DenseMap< Value *, const SCEV * > &PtrToStride, Value *Ptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
std::pair< const SCEV *, const SCEV * > getStartAndEndForAccess(const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *MaxBECount, ScalarEvolution *SE, DenseMap< std::pair< const SCEV *, Type * >, std::pair< const SCEV *, const SCEV * > > *PointerBounds)
Calculate Start and End points of memory access.
#define N
IR Values for the lower and upper bounds of a pointer evolution.
Definition: LoopUtils.cpp:1858
A CRTP mix-in that provides informational APIs needed for analysis passes.
Definition: PassManager.h:92
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: Analysis.h:28
Dependece between memory access instructions.
Instruction * getDestination(const MemoryDepChecker &DepChecker) const
Return the destination instruction of the dependence.
DepType Type
The type of the dependence.
unsigned Destination
Index of the destination of the dependence in the InstMap vector.
Dependence(unsigned Source, unsigned Destination, DepType Type)
bool isPossiblyBackward() const
May be a lexically backward dependence type (includes Unknown).
Instruction * getSource(const MemoryDepChecker &DepChecker) const
Return the source instruction of the dependence.
bool isForward() const
Lexically forward dependence.
bool isBackward() const
Lexically backward dependence.
void print(raw_ostream &OS, unsigned Depth, const SmallVectorImpl< Instruction * > &Instrs) const
Print the dependence.
unsigned Source
Index of the source of the dependence in the InstMap vector.
DepType
The type of the dependence.
static const char * DepName[]
String version of the types.
PointerDiffInfo(const SCEV *SrcStart, const SCEV *SinkStart, unsigned AccessSize, bool NeedsFreeze)
unsigned AddressSpace
Address space of the involved pointers.
bool addPointer(unsigned Index, const RuntimePointerChecking &RtCheck)
Tries to add the pointer recorded in RtCheck at index Index to this pointer checking group.
bool NeedsFreeze
Whether the pointer needs to be frozen after expansion, e.g.
const SCEV * High
The SCEV expression which represents the upper bound of all the pointers in this group.
SmallVector< unsigned, 2 > Members
Indices of all the pointers that constitute this grouping.
const SCEV * Low
The SCEV expression which represents the lower bound of all the pointers in this group.
PointerInfo(Value *PointerValue, const SCEV *Start, const SCEV *End, bool IsWritePtr, unsigned DependencySetId, unsigned AliasSetId, const SCEV *Expr, bool NeedsFreeze)
const SCEV * Start
Holds the smallest byte address accessed by the pointer throughout all iterations of the loop.
const SCEV * Expr
SCEV for the access.
bool NeedsFreeze
True if the pointer expressions needs to be frozen after expansion.
bool IsWritePtr
Holds the information if this pointer is used for writing to memory.
unsigned DependencySetId
Holds the id of the set of pointers that could be dependent because of a shared underlying object.
unsigned AliasSetId
Holds the id of the disjoint alias set to which this pointer belongs.
const SCEV * End
Holds the largest byte address accessed by the pointer throughout all iterations of the loop,...
TrackingVH< Value > PointerValue
Holds the pointer value that we need to check.
Collection of parameters shared beetween the Loop Vectorizer and the Loop Access Analysis.
static const unsigned MaxVectorWidth
Maximum SIMD width.
static unsigned VectorizationFactor
VF as overridden by the user.
static unsigned RuntimeMemoryCheckThreshold
\When performing memory disambiguation checks at runtime do not make more than this number of compari...
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.