LLVM 22.0.0git
MemoryDependenceAnalysis.cpp
Go to the documentation of this file.
1//===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements an analysis that determines, for a given memory
10// operation, what preceding memory operations it depends on. It builds on
11// alias analysis information, and tries to provide a lazy, caching interface to
12// a common kind of alias information query.
13//
14//===----------------------------------------------------------------------===//
15
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/BasicBlock.h"
30#include "llvm/IR/Dominators.h"
31#include "llvm/IR/Function.h"
32#include "llvm/IR/InstrTypes.h"
33#include "llvm/IR/Instruction.h"
36#include "llvm/IR/LLVMContext.h"
37#include "llvm/IR/Metadata.h"
38#include "llvm/IR/Module.h"
40#include "llvm/IR/Type.h"
41#include "llvm/IR/Use.h"
42#include "llvm/IR/Value.h"
44#include "llvm/Pass.h"
49#include "llvm/Support/Debug.h"
50#include <algorithm>
51#include <cassert>
52#include <iterator>
53#include <utility>
54
55using namespace llvm;
56
57#define DEBUG_TYPE "memdep"
58
59STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
60STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses");
61STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses");
62
63STATISTIC(NumCacheNonLocalPtr,
64 "Number of fully cached non-local ptr responses");
65STATISTIC(NumCacheDirtyNonLocalPtr,
66 "Number of cached, but dirty, non-local ptr responses");
67STATISTIC(NumUncacheNonLocalPtr, "Number of uncached non-local ptr responses");
68STATISTIC(NumCacheCompleteNonLocalPtr,
69 "Number of block queries that were completely cached");
70
71// Limit for the number of instructions to scan in a block.
72
74 "memdep-block-scan-limit", cl::Hidden, cl::init(100),
75 cl::desc("The number of instructions to scan in a block in memory "
76 "dependency analysis (default = 100)"));
77
79 BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(200),
80 cl::desc("The number of blocks to scan during memory "
81 "dependency analysis (default = 200)"));
82
83// Limit on the number of memdep results to process.
84static const unsigned int NumResultsLimit = 100;
85
86/// This is a helper function that removes Val from 'Inst's set in ReverseMap.
87///
88/// If the set becomes empty, remove Inst's entry.
89template <typename KeyTy>
90static void
92 Instruction *Inst, KeyTy Val) {
93 typename DenseMap<Instruction *, SmallPtrSet<KeyTy, 4>>::iterator InstIt =
94 ReverseMap.find(Inst);
95 assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
96 bool Found = InstIt->second.erase(Val);
97 assert(Found && "Invalid reverse map!");
98 (void)Found;
99 if (InstIt->second.empty())
100 ReverseMap.erase(InstIt);
101}
102
103/// If the given instruction references a specific memory location, fill in Loc
104/// with the details, otherwise set Loc.Ptr to null.
105///
106/// Returns a ModRefInfo value describing the general behavior of the
107/// instruction.
109 const TargetLibraryInfo &TLI) {
110 if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
111 if (LI->isUnordered()) {
112 Loc = MemoryLocation::get(LI);
113 return ModRefInfo::Ref;
114 }
115 if (LI->getOrdering() == AtomicOrdering::Monotonic) {
116 Loc = MemoryLocation::get(LI);
117 return ModRefInfo::ModRef;
118 }
119 Loc = MemoryLocation();
120 return ModRefInfo::ModRef;
121 }
122
123 if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
124 if (SI->isUnordered()) {
125 Loc = MemoryLocation::get(SI);
126 return ModRefInfo::Mod;
127 }
128 if (SI->getOrdering() == AtomicOrdering::Monotonic) {
129 Loc = MemoryLocation::get(SI);
130 return ModRefInfo::ModRef;
131 }
132 Loc = MemoryLocation();
133 return ModRefInfo::ModRef;
134 }
135
136 if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
137 Loc = MemoryLocation::get(V);
138 return ModRefInfo::ModRef;
139 }
140
141 if (const CallBase *CB = dyn_cast<CallBase>(Inst)) {
142 if (Value *FreedOp = getFreedOperand(CB, &TLI)) {
143 // calls to free() deallocate the entire structure
144 Loc = MemoryLocation::getAfter(FreedOp);
145 return ModRefInfo::Mod;
146 }
147 }
148
149 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
150 switch (II->getIntrinsicID()) {
151 case Intrinsic::lifetime_start:
152 case Intrinsic::lifetime_end:
153 Loc = MemoryLocation::getForArgument(II, 0, TLI);
154 // These intrinsics don't really modify the memory, but returning Mod
155 // will allow them to be handled conservatively.
156 return ModRefInfo::Mod;
157 case Intrinsic::invariant_start:
158 Loc = MemoryLocation::getForArgument(II, 1, TLI);
159 // These intrinsics don't really modify the memory, but returning Mod
160 // will allow them to be handled conservatively.
161 return ModRefInfo::Mod;
162 case Intrinsic::invariant_end:
163 Loc = MemoryLocation::getForArgument(II, 2, TLI);
164 // These intrinsics don't really modify the memory, but returning Mod
165 // will allow them to be handled conservatively.
166 return ModRefInfo::Mod;
167 case Intrinsic::masked_load:
168 Loc = MemoryLocation::getForArgument(II, 0, TLI);
169 return ModRefInfo::Ref;
170 case Intrinsic::masked_store:
171 Loc = MemoryLocation::getForArgument(II, 1, TLI);
172 return ModRefInfo::Mod;
173 default:
174 break;
175 }
176 }
177
178 // Otherwise, just do the coarse-grained thing that always works.
179 if (Inst->mayWriteToMemory())
180 return ModRefInfo::ModRef;
181 if (Inst->mayReadFromMemory())
182 return ModRefInfo::Ref;
183 return ModRefInfo::NoModRef;
184}
185
186/// Private helper for finding the local dependencies of a call site.
187MemDepResult MemoryDependenceResults::getCallDependencyFrom(
188 CallBase *Call, bool isReadOnlyCall, BasicBlock::iterator ScanIt,
189 BasicBlock *BB) {
190 unsigned Limit = getDefaultBlockScanLimit();
191
192 // Walk backwards through the block, looking for dependencies.
193 while (ScanIt != BB->begin()) {
194 Instruction *Inst = &*--ScanIt;
195
196 // Limit the amount of scanning we do so we don't end up with quadratic
197 // running time on extreme testcases.
198 --Limit;
199 if (!Limit)
201
202 // If this inst is a memory op, get the pointer it accessed
203 MemoryLocation Loc;
204 ModRefInfo MR = GetLocation(Inst, Loc, TLI);
205 if (Loc.Ptr) {
206 // A simple instruction.
207 if (isModOrRefSet(AA.getModRefInfo(Call, Loc)))
208 return MemDepResult::getClobber(Inst);
209 continue;
210 }
211
212 if (auto *CallB = dyn_cast<CallBase>(Inst)) {
213 // If these two calls do not interfere, look past it.
214 if (isNoModRef(AA.getModRefInfo(Call, CallB))) {
215 // If the two calls are the same, return Inst as a Def, so that
216 // Call can be found redundant and eliminated.
217 if (isReadOnlyCall && !isModSet(MR) &&
218 Call->isIdenticalToWhenDefined(CallB))
219 return MemDepResult::getDef(Inst);
220
221 // Otherwise if the two calls don't interact (e.g. CallB is readnone)
222 // keep scanning.
223 continue;
224 } else
225 return MemDepResult::getClobber(Inst);
226 }
227
228 // If we could not obtain a pointer for the instruction and the instruction
229 // touches memory then assume that this is a dependency.
230 if (isModOrRefSet(MR))
231 return MemDepResult::getClobber(Inst);
232 }
233
234 // No dependence found. If this is the entry block of the function, it is
235 // unknown, otherwise it is non-local.
236 if (BB != &BB->getParent()->getEntryBlock())
239}
240
242 const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
243 BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
244 BatchAAResults &BatchAA) {
245 MemDepResult InvariantGroupDependency = MemDepResult::getUnknown();
246 if (QueryInst != nullptr) {
247 if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
248 InvariantGroupDependency = getInvariantGroupPointerDependency(LI, BB);
249
250 if (InvariantGroupDependency.isDef())
251 return InvariantGroupDependency;
252 }
253 }
255 MemLoc, isLoad, ScanIt, BB, QueryInst, Limit, BatchAA);
256 if (SimpleDep.isDef())
257 return SimpleDep;
258 // Non-local invariant group dependency indicates there is non local Def
259 // (it only returns nonLocal if it finds nonLocal def), which is better than
260 // local clobber and everything else.
261 if (InvariantGroupDependency.isNonLocal())
262 return InvariantGroupDependency;
263
264 assert(InvariantGroupDependency.isUnknown() &&
265 "InvariantGroupDependency should be only unknown at this point");
266 return SimpleDep;
267}
268
270 const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
271 BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
272 BatchAAResults BatchAA(AA, &EEA);
273 return getPointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst, Limit,
274 BatchAA);
275}
276
279 BasicBlock *BB) {
280
281 if (!LI->hasMetadata(LLVMContext::MD_invariant_group))
283
284 // Take the ptr operand after all casts and geps 0. This way we can search
285 // cast graph down only.
286 Value *LoadOperand = LI->getPointerOperand()->stripPointerCasts();
287
288 // It's is not safe to walk the use list of global value, because function
289 // passes aren't allowed to look outside their functions.
290 // FIXME: this could be fixed by filtering instructions from outside
291 // of current function.
292 if (isa<GlobalValue>(LoadOperand))
294
295 Instruction *ClosestDependency = nullptr;
296 // Order of instructions in uses list is unpredictible. In order to always
297 // get the same result, we will look for the closest dominance.
298 auto GetClosestDependency = [this](Instruction *Best, Instruction *Other) {
299 assert(Other && "Must call it with not null instruction");
300 if (Best == nullptr || DT.dominates(Best, Other))
301 return Other;
302 return Best;
303 };
304
305 for (const Use &Us : LoadOperand->uses()) {
306 auto *U = dyn_cast<Instruction>(Us.getUser());
307 if (!U || U == LI || !DT.dominates(U, LI))
308 continue;
309
310 // If we hit load/store with the same invariant.group metadata (and the
311 // same pointer operand) we can assume that value pointed by pointer
312 // operand didn't change.
313 if ((isa<LoadInst>(U) ||
314 (isa<StoreInst>(U) &&
315 cast<StoreInst>(U)->getPointerOperand() == LoadOperand)) &&
316 U->hasMetadata(LLVMContext::MD_invariant_group))
317 ClosestDependency = GetClosestDependency(ClosestDependency, U);
318 }
319
320 if (!ClosestDependency)
322 if (ClosestDependency->getParent() == BB)
323 return MemDepResult::getDef(ClosestDependency);
324 // Def(U) can't be returned here because it is non-local. If local
325 // dependency won't be found then return nonLocal counting that the
326 // user will call getNonLocalPointerDependency, which will return cached
327 // result.
328 NonLocalDefsCache.try_emplace(
329 LI, NonLocalDepResult(ClosestDependency->getParent(),
330 MemDepResult::getDef(ClosestDependency), nullptr));
331 ReverseNonLocalDefsCache[ClosestDependency].insert(LI);
333}
334
335// Check if SI that may alias with MemLoc can be safely skipped. This is
336// possible in case if SI can only must alias or no alias with MemLoc (no
337// partial overlapping possible) and it writes the same value that MemLoc
338// contains now (it was loaded before this store and was not modified in
339// between).
340static bool canSkipClobberingStore(const StoreInst *SI,
341 const MemoryLocation &MemLoc,
342 Align MemLocAlign, BatchAAResults &BatchAA,
343 unsigned ScanLimit) {
344 if (!MemLoc.Size.hasValue())
345 return false;
346 if (MemoryLocation::get(SI).Size != MemLoc.Size)
347 return false;
348 if (MemLoc.Size.isScalable())
349 return false;
350 if (std::min(MemLocAlign, SI->getAlign()).value() <
351 MemLoc.Size.getValue().getKnownMinValue())
352 return false;
353
354 auto *LI = dyn_cast<LoadInst>(SI->getValueOperand());
355 if (!LI || LI->getParent() != SI->getParent())
356 return false;
357 if (BatchAA.alias(MemoryLocation::get(LI), MemLoc) != AliasResult::MustAlias)
358 return false;
359 unsigned NumVisitedInsts = 0;
360 for (const Instruction *I = LI; I != SI; I = I->getNextNode())
361 if (++NumVisitedInsts > ScanLimit ||
362 isModSet(BatchAA.getModRefInfo(I, MemLoc)))
363 return false;
364
365 return true;
366}
367
369 const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
370 BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
371 BatchAAResults &BatchAA) {
372 bool isInvariantLoad = false;
373 Align MemLocAlign =
375
376 unsigned DefaultLimit = getDefaultBlockScanLimit();
377 if (!Limit)
378 Limit = &DefaultLimit;
379
380 // We must be careful with atomic accesses, as they may allow another thread
381 // to touch this location, clobbering it. We are conservative: if the
382 // QueryInst is not a simple (non-atomic) memory access, we automatically
383 // return getClobber.
384 // If it is simple, we know based on the results of
385 // "Compiler testing via a theory of sound optimisations in the C11/C++11
386 // memory model" in PLDI 2013, that a non-atomic location can only be
387 // clobbered between a pair of a release and an acquire action, with no
388 // access to the location in between.
389 // Here is an example for giving the general intuition behind this rule.
390 // In the following code:
391 // store x 0;
392 // release action; [1]
393 // acquire action; [4]
394 // %val = load x;
395 // It is unsafe to replace %val by 0 because another thread may be running:
396 // acquire action; [2]
397 // store x 42;
398 // release action; [3]
399 // with synchronization from 1 to 2 and from 3 to 4, resulting in %val
400 // being 42. A key property of this program however is that if either
401 // 1 or 4 were missing, there would be a race between the store of 42
402 // either the store of 0 or the load (making the whole program racy).
403 // The paper mentioned above shows that the same property is respected
404 // by every program that can detect any optimization of that kind: either
405 // it is racy (undefined) or there is a release followed by an acquire
406 // between the pair of accesses under consideration.
407
408 // If the load is invariant, we "know" that it doesn't alias *any* write. We
409 // do want to respect mustalias results since defs are useful for value
410 // forwarding, but any mayalias write can be assumed to be noalias.
411 // Arguably, this logic should be pushed inside AliasAnalysis itself.
412 if (isLoad && QueryInst)
413 if (LoadInst *LI = dyn_cast<LoadInst>(QueryInst)) {
414 if (LI->hasMetadata(LLVMContext::MD_invariant_load))
415 isInvariantLoad = true;
416 MemLocAlign = LI->getAlign();
417 }
418
419 // True for volatile instruction.
420 // For Load/Store return true if atomic ordering is stronger than AO,
421 // for other instruction just true if it can read or write to memory.
422 auto isComplexForReordering = [](Instruction * I, AtomicOrdering AO)->bool {
423 if (I->isVolatile())
424 return true;
425 if (auto *LI = dyn_cast<LoadInst>(I))
426 return isStrongerThan(LI->getOrdering(), AO);
427 if (auto *SI = dyn_cast<StoreInst>(I))
428 return isStrongerThan(SI->getOrdering(), AO);
429 return I->mayReadOrWriteMemory();
430 };
431
432 // Walk backwards through the basic block, looking for dependencies.
433 while (ScanIt != BB->begin()) {
434 Instruction *Inst = &*--ScanIt;
435
436 // Limit the amount of scanning we do so we don't end up with quadratic
437 // running time on extreme testcases.
438 --*Limit;
439 if (!*Limit)
441
442 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
443 // If we reach a lifetime begin or end marker, then the query ends here
444 // because the value is undefined.
445 Intrinsic::ID ID = II->getIntrinsicID();
446 switch (ID) {
447 case Intrinsic::lifetime_start: {
448 MemoryLocation ArgLoc = MemoryLocation::getAfter(II->getArgOperand(0));
449 if (BatchAA.isMustAlias(ArgLoc, MemLoc))
450 return MemDepResult::getDef(II);
451 continue;
452 }
453 case Intrinsic::masked_load:
454 case Intrinsic::masked_store: {
455 MemoryLocation Loc;
456 /*ModRefInfo MR =*/ GetLocation(II, Loc, TLI);
457 AliasResult R = BatchAA.alias(Loc, MemLoc);
458 if (R == AliasResult::NoAlias)
459 continue;
460 if (R == AliasResult::MustAlias)
461 return MemDepResult::getDef(II);
462 if (ID == Intrinsic::masked_load)
463 continue;
465 }
466 }
467 }
468
469 // Values depend on loads if the pointers are must aliased. This means
470 // that a load depends on another must aliased load from the same value.
471 // One exception is atomic loads: a value can depend on an atomic load that
472 // it does not alias with when this atomic load indicates that another
473 // thread may be accessing the location.
474 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
475 // While volatile access cannot be eliminated, they do not have to clobber
476 // non-aliasing locations, as normal accesses, for example, can be safely
477 // reordered with volatile accesses.
478 if (LI->isVolatile()) {
479 if (!QueryInst)
480 // Original QueryInst *may* be volatile
481 return MemDepResult::getClobber(LI);
482 if (QueryInst->isVolatile())
483 // Ordering required if QueryInst is itself volatile
484 return MemDepResult::getClobber(LI);
485 // Otherwise, volatile doesn't imply any special ordering
486 }
487
488 // Atomic loads have complications involved.
489 // A Monotonic (or higher) load is OK if the query inst is itself not
490 // atomic.
491 // FIXME: This is overly conservative.
492 if (LI->isAtomic() && isStrongerThanUnordered(LI->getOrdering())) {
493 if (!QueryInst ||
494 isComplexForReordering(QueryInst, AtomicOrdering::NotAtomic))
495 return MemDepResult::getClobber(LI);
496 if (LI->getOrdering() != AtomicOrdering::Monotonic)
497 return MemDepResult::getClobber(LI);
498 }
499
501
502 // If we found a pointer, check if it could be the same as our pointer.
503 AliasResult R = BatchAA.alias(LoadLoc, MemLoc);
504
505 if (R == AliasResult::NoAlias)
506 continue;
507
508 if (isLoad) {
509 // Must aliased loads are defs of each other.
510 if (R == AliasResult::MustAlias)
511 return MemDepResult::getDef(Inst);
512
513 // If we have a partial alias, then return this as a clobber for the
514 // client to handle.
515 if (R == AliasResult::PartialAlias && R.hasOffset()) {
516 ClobberOffsets[LI] = R.getOffset();
517 return MemDepResult::getClobber(Inst);
518 }
519
520 // Random may-alias loads don't depend on each other without a
521 // dependence.
522 continue;
523 }
524
525 // Stores don't alias loads from read-only memory.
526 if (!isModSet(BatchAA.getModRefInfoMask(LoadLoc)))
527 continue;
528
529 // Stores depend on may/must aliased loads.
530 return MemDepResult::getDef(Inst);
531 }
532
533 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
534 // Atomic stores have complications involved.
535 // A Monotonic store is OK if the query inst is itself not atomic.
536 // FIXME: This is overly conservative.
537 if (!SI->isUnordered() && SI->isAtomic()) {
538 if (!QueryInst ||
539 isComplexForReordering(QueryInst, AtomicOrdering::Unordered))
540 return MemDepResult::getClobber(SI);
541 // Ok, if we are here the guard above guarantee us that
542 // QueryInst is a non-atomic or unordered load/store.
543 // SI is atomic with monotonic or release semantic (seq_cst for store
544 // is actually a release semantic plus total order over other seq_cst
545 // instructions, as soon as QueryInst is not seq_cst we can consider it
546 // as simple release semantic).
547 // Monotonic and Release semantic allows re-ordering before store
548 // so we are safe to go further and check the aliasing. It will prohibit
549 // re-ordering in case locations are may or must alias.
550 }
551
552 // While volatile access cannot be eliminated, they do not have to clobber
553 // non-aliasing locations, as normal accesses can for example be reordered
554 // with volatile accesses.
555 if (SI->isVolatile())
556 if (!QueryInst || QueryInst->isVolatile())
557 return MemDepResult::getClobber(SI);
558
559 // If alias analysis can tell that this store is guaranteed to not modify
560 // the query pointer, ignore it. Use getModRefInfo to handle cases where
561 // the query pointer points to constant memory etc.
562 if (!isModOrRefSet(BatchAA.getModRefInfo(SI, MemLoc)))
563 continue;
564
565 // Ok, this store might clobber the query pointer. Check to see if it is
566 // a must alias: in this case, we want to return this as a def.
567 // FIXME: Use ModRefInfo::Must bit from getModRefInfo call above.
568 MemoryLocation StoreLoc = MemoryLocation::get(SI);
569
570 // If we found a pointer, check if it could be the same as our pointer.
571 AliasResult R = BatchAA.alias(StoreLoc, MemLoc);
572
573 if (R == AliasResult::NoAlias)
574 continue;
575 if (R == AliasResult::MustAlias)
576 return MemDepResult::getDef(Inst);
577 if (isInvariantLoad)
578 continue;
579 if (canSkipClobberingStore(SI, MemLoc, MemLocAlign, BatchAA, *Limit))
580 continue;
581 return MemDepResult::getClobber(Inst);
582 }
583
584 // If this is an allocation, and if we know that the accessed pointer is to
585 // the allocation, return Def. This means that there is no dependence and
586 // the access can be optimized based on that. For example, a load could
587 // turn into undef. Note that we can bypass the allocation itself when
588 // looking for a clobber in many cases; that's an alias property and is
589 // handled by BasicAA.
590 if (isa<AllocaInst>(Inst) || isNoAliasCall(Inst)) {
591 const Value *AccessPtr = getUnderlyingObject(MemLoc.Ptr);
592 if (AccessPtr == Inst || BatchAA.isMustAlias(Inst, AccessPtr))
593 return MemDepResult::getDef(Inst);
594 }
595
596 // If we found a select instruction for MemLoc pointer, return it as Def
597 // dependency.
598 if (isa<SelectInst>(Inst) && MemLoc.Ptr == Inst)
599 return MemDepResult::getDef(Inst);
600
601 if (isInvariantLoad)
602 continue;
603
604 // A release fence requires that all stores complete before it, but does
605 // not prevent the reordering of following loads or stores 'before' the
606 // fence. As a result, we look past it when finding a dependency for
607 // loads. DSE uses this to find preceding stores to delete and thus we
608 // can't bypass the fence if the query instruction is a store.
609 if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
610 if (isLoad && FI->getOrdering() == AtomicOrdering::Release)
611 continue;
612
613 // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
614 switch (BatchAA.getModRefInfo(Inst, MemLoc)) {
616 // If the call has no effect on the queried pointer, just ignore it.
617 continue;
618 case ModRefInfo::Mod:
619 return MemDepResult::getClobber(Inst);
620 case ModRefInfo::Ref:
621 // If the call is known to never store to the pointer, and if this is a
622 // load query, we can safely ignore it (scan past it).
623 if (isLoad)
624 continue;
625 [[fallthrough]];
626 default:
627 // Otherwise, there is a potential dependence. Return a clobber.
628 return MemDepResult::getClobber(Inst);
629 }
630 }
631
632 // No dependence found. If this is the entry block of the function, it is
633 // unknown, otherwise it is non-local.
634 if (BB != &BB->getParent()->getEntryBlock())
637}
638
640 ClobberOffsets.clear();
641 Instruction *ScanPos = QueryInst;
642
643 // Check for a cached result
644 MemDepResult &LocalCache = LocalDeps[QueryInst];
645
646 // If the cached entry is non-dirty, just return it. Note that this depends
647 // on MemDepResult's default constructing to 'dirty'.
648 if (!LocalCache.isDirty())
649 return LocalCache;
650
651 // Otherwise, if we have a dirty entry, we know we can start the scan at that
652 // instruction, which may save us some work.
653 if (Instruction *Inst = LocalCache.getInst()) {
654 ScanPos = Inst;
655
656 RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst);
657 }
658
659 BasicBlock *QueryParent = QueryInst->getParent();
660
661 // Do the scan.
662 if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
663 // No dependence found. If this is the entry block of the function, it is
664 // unknown, otherwise it is non-local.
665 if (QueryParent != &QueryParent->getParent()->getEntryBlock())
666 LocalCache = MemDepResult::getNonLocal();
667 else
668 LocalCache = MemDepResult::getNonFuncLocal();
669 } else {
670 MemoryLocation MemLoc;
671 ModRefInfo MR = GetLocation(QueryInst, MemLoc, TLI);
672 if (MemLoc.Ptr) {
673 // If we can do a pointer scan, make it happen.
674 bool isLoad = !isModSet(MR);
675 if (auto *II = dyn_cast<IntrinsicInst>(QueryInst))
676 isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
677
678 LocalCache =
679 getPointerDependencyFrom(MemLoc, isLoad, ScanPos->getIterator(),
680 QueryParent, QueryInst, nullptr);
681 } else if (auto *QueryCall = dyn_cast<CallBase>(QueryInst)) {
682 bool isReadOnly = AA.onlyReadsMemory(QueryCall);
683 LocalCache = getCallDependencyFrom(QueryCall, isReadOnly,
684 ScanPos->getIterator(), QueryParent);
685 } else
686 // Non-memory instruction.
687 LocalCache = MemDepResult::getUnknown();
688 }
689
690 // Remember the result!
691 if (Instruction *I = LocalCache.getInst())
692 ReverseLocalDeps[I].insert(QueryInst);
693
694 return LocalCache;
695}
696
697#ifndef NDEBUG
698/// This method is used when -debug is specified to verify that cache arrays
699/// are properly kept sorted.
701 int Count = -1) {
702 if (Count == -1)
703 Count = Cache.size();
704 assert(std::is_sorted(Cache.begin(), Cache.begin() + Count) &&
705 "Cache isn't sorted!");
706}
707#endif
708
711 assert(getDependency(QueryCall).isNonLocal() &&
712 "getNonLocalCallDependency should only be used on calls with "
713 "non-local deps!");
714 PerInstNLInfo &CacheP = NonLocalDepsMap[QueryCall];
715 NonLocalDepInfo &Cache = CacheP.first;
716
717 // This is the set of blocks that need to be recomputed. In the cached case,
718 // this can happen due to instructions being deleted etc. In the uncached
719 // case, this starts out as the set of predecessors we care about.
721
722 if (!Cache.empty()) {
723 // Okay, we have a cache entry. If we know it is not dirty, just return it
724 // with no computation.
725 if (!CacheP.second) {
726 ++NumCacheNonLocal;
727 return Cache;
728 }
729
730 // If we already have a partially computed set of results, scan them to
731 // determine what is dirty, seeding our initial DirtyBlocks worklist.
732 for (auto &Entry : Cache)
733 if (Entry.getResult().isDirty())
734 DirtyBlocks.push_back(Entry.getBB());
735
736 // Sort the cache so that we can do fast binary search lookups below.
737 llvm::sort(Cache);
738
739 ++NumCacheDirtyNonLocal;
740 } else {
741 // Seed DirtyBlocks with each of the preds of QueryInst's block.
742 BasicBlock *QueryBB = QueryCall->getParent();
743 append_range(DirtyBlocks, PredCache.get(QueryBB));
744 ++NumUncacheNonLocal;
745 }
746
747 // isReadonlyCall - If this is a read-only call, we can be more aggressive.
748 bool isReadonlyCall = AA.onlyReadsMemory(QueryCall);
749
751
752 unsigned NumSortedEntries = Cache.size();
753 LLVM_DEBUG(AssertSorted(Cache));
754
755 // Iterate while we still have blocks to update.
756 while (!DirtyBlocks.empty()) {
757 BasicBlock *DirtyBB = DirtyBlocks.pop_back_val();
758
759 // Already processed this block?
760 if (!Visited.insert(DirtyBB).second)
761 continue;
762
763 // Do a binary search to see if we already have an entry for this block in
764 // the cache set. If so, find it.
765 LLVM_DEBUG(AssertSorted(Cache, NumSortedEntries));
766 NonLocalDepInfo::iterator Entry =
767 std::upper_bound(Cache.begin(), Cache.begin() + NumSortedEntries,
768 NonLocalDepEntry(DirtyBB));
769 if (Entry != Cache.begin() && std::prev(Entry)->getBB() == DirtyBB)
770 --Entry;
771
772 NonLocalDepEntry *ExistingResult = nullptr;
773 if (Entry != Cache.begin() + NumSortedEntries &&
774 Entry->getBB() == DirtyBB) {
775 // If we already have an entry, and if it isn't already dirty, the block
776 // is done.
777 if (!Entry->getResult().isDirty())
778 continue;
779
780 // Otherwise, remember this slot so we can update the value.
781 ExistingResult = &*Entry;
782 }
783
784 // If the dirty entry has a pointer, start scanning from it so we don't have
785 // to rescan the entire block.
786 BasicBlock::iterator ScanPos = DirtyBB->end();
787 if (ExistingResult) {
788 if (Instruction *Inst = ExistingResult->getResult().getInst()) {
789 ScanPos = Inst->getIterator();
790 // We're removing QueryInst's use of Inst.
791 RemoveFromReverseMap<Instruction *>(ReverseNonLocalDeps, Inst,
792 QueryCall);
793 }
794 }
795
796 // Find out if this block has a local dependency for QueryInst.
797 MemDepResult Dep;
798
799 if (ScanPos != DirtyBB->begin()) {
800 Dep = getCallDependencyFrom(QueryCall, isReadonlyCall, ScanPos, DirtyBB);
801 } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
802 // No dependence found. If this is the entry block of the function, it is
803 // a clobber, otherwise it is unknown.
805 } else {
807 }
808
809 // If we had a dirty entry for the block, update it. Otherwise, just add
810 // a new entry.
811 if (ExistingResult)
812 ExistingResult->setResult(Dep);
813 else
814 Cache.push_back(NonLocalDepEntry(DirtyBB, Dep));
815
816 // If the block has a dependency (i.e. it isn't completely transparent to
817 // the value), remember the association!
818 if (!Dep.isNonLocal()) {
819 // Keep the ReverseNonLocalDeps map up to date so we can efficiently
820 // update this when we remove instructions.
821 if (Instruction *Inst = Dep.getInst())
822 ReverseNonLocalDeps[Inst].insert(QueryCall);
823 } else {
824
825 // If the block *is* completely transparent to the load, we need to check
826 // the predecessors of this block. Add them to our worklist.
827 append_range(DirtyBlocks, PredCache.get(DirtyBB));
828 }
829 }
830
831 return Cache;
832}
833
836 const MemoryLocation Loc = MemoryLocation::get(QueryInst);
837 bool isLoad = isa<LoadInst>(QueryInst);
838 BasicBlock *FromBB = QueryInst->getParent();
839 assert(FromBB);
840
841 assert(Loc.Ptr->getType()->isPointerTy() &&
842 "Can't get pointer deps of a non-pointer!");
843 Result.clear();
844 {
845 // Check if there is cached Def with invariant.group.
846 auto NonLocalDefIt = NonLocalDefsCache.find(QueryInst);
847 if (NonLocalDefIt != NonLocalDefsCache.end()) {
848 Result.push_back(NonLocalDefIt->second);
849 ReverseNonLocalDefsCache[NonLocalDefIt->second.getResult().getInst()]
850 .erase(QueryInst);
851 NonLocalDefsCache.erase(NonLocalDefIt);
852 return;
853 }
854 }
855 // This routine does not expect to deal with volatile instructions.
856 // Doing so would require piping through the QueryInst all the way through.
857 // TODO: volatiles can't be elided, but they can be reordered with other
858 // non-volatile accesses.
859
860 // We currently give up on any instruction which is ordered, but we do handle
861 // atomic instructions which are unordered.
862 // TODO: Handle ordered instructions
863 auto isOrdered = [](Instruction *Inst) {
864 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
865 return !LI->isUnordered();
866 } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
867 return !SI->isUnordered();
868 }
869 return false;
870 };
871 if (QueryInst->isVolatile() || isOrdered(QueryInst)) {
872 Result.push_back(NonLocalDepResult(FromBB, MemDepResult::getUnknown(),
873 const_cast<Value *>(Loc.Ptr)));
874 return;
875 }
876 const DataLayout &DL = FromBB->getDataLayout();
877 PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, &AC);
878
879 // This is the set of blocks we've inspected, and the pointer we consider in
880 // each block. Because of critical edges, we currently bail out if querying
881 // a block with multiple different pointers. This can happen during PHI
882 // translation.
884 if (getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB,
885 Result, Visited, true))
886 return;
887 Result.clear();
888 Result.push_back(NonLocalDepResult(FromBB, MemDepResult::getUnknown(),
889 const_cast<Value *>(Loc.Ptr)));
890}
891
892/// Compute the memdep value for BB with Pointer/PointeeSize using either
893/// cached information in Cache or by doing a lookup (which may use dirty cache
894/// info if available).
895///
896/// If we do a lookup, add the result to the cache.
897MemDepResult MemoryDependenceResults::getNonLocalInfoForBlock(
898 Instruction *QueryInst, const MemoryLocation &Loc, bool isLoad,
899 BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries,
900 BatchAAResults &BatchAA) {
901
902 bool isInvariantLoad = false;
903
904 if (LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst))
905 isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load);
906
907 // Do a binary search to see if we already have an entry for this block in
908 // the cache set. If so, find it.
909 NonLocalDepInfo::iterator Entry = std::upper_bound(
910 Cache->begin(), Cache->begin() + NumSortedEntries, NonLocalDepEntry(BB));
911 if (Entry != Cache->begin() && (Entry - 1)->getBB() == BB)
912 --Entry;
913
914 NonLocalDepEntry *ExistingResult = nullptr;
915 if (Entry != Cache->begin() + NumSortedEntries && Entry->getBB() == BB)
916 ExistingResult = &*Entry;
917
918 // Use cached result for invariant load only if there is no dependency for non
919 // invariant load. In this case invariant load can not have any dependency as
920 // well.
921 if (ExistingResult && isInvariantLoad &&
922 !ExistingResult->getResult().isNonFuncLocal())
923 ExistingResult = nullptr;
924
925 // If we have a cached entry, and it is non-dirty, use it as the value for
926 // this dependency.
927 if (ExistingResult && !ExistingResult->getResult().isDirty()) {
928 ++NumCacheNonLocalPtr;
929 return ExistingResult->getResult();
930 }
931
932 // Otherwise, we have to scan for the value. If we have a dirty cache
933 // entry, start scanning from its position, otherwise we scan from the end
934 // of the block.
935 BasicBlock::iterator ScanPos = BB->end();
936 if (ExistingResult && ExistingResult->getResult().getInst()) {
937 assert(ExistingResult->getResult().getInst()->getParent() == BB &&
938 "Instruction invalidated?");
939 ++NumCacheDirtyNonLocalPtr;
940 ScanPos = ExistingResult->getResult().getInst()->getIterator();
941
942 // Eliminating the dirty entry from 'Cache', so update the reverse info.
943 ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
944 RemoveFromReverseMap(ReverseNonLocalPtrDeps, &*ScanPos, CacheKey);
945 } else {
946 ++NumUncacheNonLocalPtr;
947 }
948
949 // Scan the block for the dependency.
950 MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB,
951 QueryInst, nullptr, BatchAA);
952
953 // Don't cache results for invariant load.
954 if (isInvariantLoad)
955 return Dep;
956
957 // If we had a dirty entry for the block, update it. Otherwise, just add
958 // a new entry.
959 if (ExistingResult)
960 ExistingResult->setResult(Dep);
961 else
962 Cache->push_back(NonLocalDepEntry(BB, Dep));
963
964 // If the block has a dependency (i.e. it isn't completely transparent to
965 // the value), remember the reverse association because we just added it
966 // to Cache!
967 if (!Dep.isLocal())
968 return Dep;
969
970 // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
971 // update MemDep when we remove instructions.
972 Instruction *Inst = Dep.getInst();
973 assert(Inst && "Didn't depend on anything?");
974 ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
975 ReverseNonLocalPtrDeps[Inst].insert(CacheKey);
976 return Dep;
977}
978
979/// Sort the NonLocalDepInfo cache, given a certain number of elements in the
980/// array that are already properly ordered.
981///
982/// This is optimized for the case when only a few entries are added.
983static void
985 unsigned NumSortedEntries) {
986
987 // If only one entry, don't sort.
988 if (Cache.size() < 2)
989 return;
990
991 unsigned s = Cache.size() - NumSortedEntries;
992
993 // If the cache is already sorted, don't sort it again.
994 if (s == 0)
995 return;
996
997 // If no entry is sorted, sort the whole cache.
998 if (NumSortedEntries == 0) {
999 llvm::sort(Cache);
1000 return;
1001 }
1002
1003 // If the number of unsorted entires is small and the cache size is big, using
1004 // insertion sort is faster. Here use Log2_32 to quickly choose the sort
1005 // method.
1006 if (s < Log2_32(Cache.size())) {
1007 while (s > 0) {
1008 NonLocalDepEntry Val = Cache.back();
1009 Cache.pop_back();
1010 MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
1011 std::upper_bound(Cache.begin(), Cache.end() - s + 1, Val);
1012 Cache.insert(Entry, Val);
1013 s--;
1014 }
1015 } else {
1016 llvm::sort(Cache);
1017 }
1018}
1019
1020/// Perform a dependency query based on pointer/pointeesize starting at the end
1021/// of StartBB.
1022///
1023/// Add any clobber/def results to the results vector and keep track of which
1024/// blocks are visited in 'Visited'.
1025///
1026/// This has special behavior for the first block queries (when SkipFirstBlock
1027/// is true). In this special case, it ignores the contents of the specified
1028/// block and starts returning dependence info for its predecessors.
1029///
1030/// This function returns true on success, or false to indicate that it could
1031/// not compute dependence information for some reason. This should be treated
1032/// as a clobber dependence on the first instruction in the predecessor block.
1033bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
1034 Instruction *QueryInst, const PHITransAddr &Pointer,
1035 const MemoryLocation &Loc, bool isLoad, BasicBlock *StartBB,
1037 SmallDenseMap<BasicBlock *, Value *, 16> &Visited, bool SkipFirstBlock,
1038 bool IsIncomplete) {
1039 // Look up the cached info for Pointer.
1040 ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
1041
1042 // Set up a temporary NLPI value. If the map doesn't yet have an entry for
1043 // CacheKey, this value will be inserted as the associated value. Otherwise,
1044 // it'll be ignored, and we'll have to check to see if the cached size and
1045 // aa tags are consistent with the current query.
1046 NonLocalPointerInfo InitialNLPI;
1047 InitialNLPI.Size = Loc.Size;
1048 InitialNLPI.AATags = Loc.AATags;
1049
1050 bool isInvariantLoad = false;
1051 if (LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst))
1052 isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load);
1053
1054 // Get the NLPI for CacheKey, inserting one into the map if it doesn't
1055 // already have one.
1056 std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
1057 NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
1058 NonLocalPointerInfo *CacheInfo = &Pair.first->second;
1059
1060 // If we already have a cache entry for this CacheKey, we may need to do some
1061 // work to reconcile the cache entry and the current query.
1062 // Invariant loads don't participate in caching. Thus no need to reconcile.
1063 if (!isInvariantLoad && !Pair.second) {
1064 if (CacheInfo->Size != Loc.Size) {
1065 // The query's Size is not equal to the cached one. Throw out the cached
1066 // data and proceed with the query with the new size.
1067 CacheInfo->Pair = BBSkipFirstBlockPair();
1068 CacheInfo->Size = Loc.Size;
1069 for (auto &Entry : CacheInfo->NonLocalDeps)
1070 if (Instruction *Inst = Entry.getResult().getInst())
1071 RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
1072 CacheInfo->NonLocalDeps.clear();
1073 // The cache is cleared (in the above line) so we will have lost
1074 // information about blocks we have already visited. We therefore must
1075 // assume that the cache information is incomplete.
1076 IsIncomplete = true;
1077 }
1078
1079 // If the query's AATags are inconsistent with the cached one,
1080 // conservatively throw out the cached data and restart the query with
1081 // no tag if needed.
1082 if (CacheInfo->AATags != Loc.AATags) {
1083 if (CacheInfo->AATags) {
1084 CacheInfo->Pair = BBSkipFirstBlockPair();
1085 CacheInfo->AATags = AAMDNodes();
1086 for (auto &Entry : CacheInfo->NonLocalDeps)
1087 if (Instruction *Inst = Entry.getResult().getInst())
1088 RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
1089 CacheInfo->NonLocalDeps.clear();
1090 // The cache is cleared (in the above line) so we will have lost
1091 // information about blocks we have already visited. We therefore must
1092 // assume that the cache information is incomplete.
1093 IsIncomplete = true;
1094 }
1095 if (Loc.AATags)
1096 return getNonLocalPointerDepFromBB(
1097 QueryInst, Pointer, Loc.getWithoutAATags(), isLoad, StartBB, Result,
1098 Visited, SkipFirstBlock, IsIncomplete);
1099 }
1100 }
1101
1102 NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps;
1103
1104 // If we have valid cached information for exactly the block we are
1105 // investigating, just return it with no recomputation.
1106 // Don't use cached information for invariant loads since it is valid for
1107 // non-invariant loads only.
1108 if (!IsIncomplete && !isInvariantLoad &&
1109 CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
1110 // We have a fully cached result for this query then we can just return the
1111 // cached results and populate the visited set. However, we have to verify
1112 // that we don't already have conflicting results for these blocks. Check
1113 // to ensure that if a block in the results set is in the visited set that
1114 // it was for the same pointer query.
1115 if (!Visited.empty()) {
1116 for (auto &Entry : *Cache) {
1118 Visited.find(Entry.getBB());
1119 if (VI == Visited.end() || VI->second == Pointer.getAddr())
1120 continue;
1121
1122 // We have a pointer mismatch in a block. Just return false, saying
1123 // that something was clobbered in this result. We could also do a
1124 // non-fully cached query, but there is little point in doing this.
1125 return false;
1126 }
1127 }
1128
1129 Value *Addr = Pointer.getAddr();
1130 for (auto &Entry : *Cache) {
1131 Visited.insert(std::make_pair(Entry.getBB(), Addr));
1132 if (Entry.getResult().isNonLocal()) {
1133 continue;
1134 }
1135
1136 if (DT.isReachableFromEntry(Entry.getBB())) {
1137 Result.push_back(
1138 NonLocalDepResult(Entry.getBB(), Entry.getResult(), Addr));
1139 }
1140 }
1141 ++NumCacheCompleteNonLocalPtr;
1142 return true;
1143 }
1144
1145 // Otherwise, either this is a new block, a block with an invalid cache
1146 // pointer or one that we're about to invalidate by putting more info into
1147 // it than its valid cache info. If empty and not explicitly indicated as
1148 // incomplete, the result will be valid cache info, otherwise it isn't.
1149 //
1150 // Invariant loads don't affect cache in any way thus no need to update
1151 // CacheInfo as well.
1152 if (!isInvariantLoad) {
1153 if (!IsIncomplete && Cache->empty())
1154 CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
1155 else
1156 CacheInfo->Pair = BBSkipFirstBlockPair();
1157 }
1158
1160 Worklist.push_back(StartBB);
1161
1162 // PredList used inside loop.
1164
1165 // Keep track of the entries that we know are sorted. Previously cached
1166 // entries will all be sorted. The entries we add we only sort on demand (we
1167 // don't insert every element into its sorted position). We know that we
1168 // won't get any reuse from currently inserted values, because we don't
1169 // revisit blocks after we insert info for them.
1170 unsigned NumSortedEntries = Cache->size();
1171 unsigned WorklistEntries = BlockNumberLimit;
1172 bool GotWorklistLimit = false;
1173 LLVM_DEBUG(AssertSorted(*Cache));
1174
1175 BatchAAResults BatchAA(AA, &EEA);
1176 while (!Worklist.empty()) {
1177 BasicBlock *BB = Worklist.pop_back_val();
1178
1179 // If we do process a large number of blocks it becomes very expensive and
1180 // likely it isn't worth worrying about
1181 if (Result.size() > NumResultsLimit) {
1182 // Sort it now (if needed) so that recursive invocations of
1183 // getNonLocalPointerDepFromBB and other routines that could reuse the
1184 // cache value will only see properly sorted cache arrays.
1185 if (Cache && NumSortedEntries != Cache->size()) {
1186 SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1187 }
1188 // Since we bail out, the "Cache" set won't contain all of the
1189 // results for the query. This is ok (we can still use it to accelerate
1190 // specific block queries) but we can't do the fastpath "return all
1191 // results from the set". Clear out the indicator for this.
1192 CacheInfo->Pair = BBSkipFirstBlockPair();
1193 return false;
1194 }
1195
1196 // Skip the first block if we have it.
1197 if (!SkipFirstBlock) {
1198 // Analyze the dependency of *Pointer in FromBB. See if we already have
1199 // been here.
1200 assert(Visited.count(BB) && "Should check 'visited' before adding to WL");
1201
1202 // Get the dependency info for Pointer in BB. If we have cached
1203 // information, we will use it, otherwise we compute it.
1204 LLVM_DEBUG(AssertSorted(*Cache, NumSortedEntries));
1205 MemDepResult Dep = getNonLocalInfoForBlock(
1206 QueryInst, Loc, isLoad, BB, Cache, NumSortedEntries, BatchAA);
1207
1208 // If we got a Def or Clobber, add this to the list of results.
1209 if (!Dep.isNonLocal()) {
1210 if (DT.isReachableFromEntry(BB)) {
1211 Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
1212 continue;
1213 }
1214 }
1215 }
1216
1217 // If 'Pointer' is an instruction defined in this block, then we need to do
1218 // phi translation to change it into a value live in the predecessor block.
1219 // If not, we just add the predecessors to the worklist and scan them with
1220 // the same Pointer.
1221 if (!Pointer.needsPHITranslationFromBlock(BB)) {
1222 SkipFirstBlock = false;
1224 for (BasicBlock *Pred : PredCache.get(BB)) {
1225 // Verify that we haven't looked at this block yet.
1226 std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
1227 Visited.insert(std::make_pair(Pred, Pointer.getAddr()));
1228 if (InsertRes.second) {
1229 // First time we've looked at *PI.
1230 NewBlocks.push_back(Pred);
1231 continue;
1232 }
1233
1234 // If we have seen this block before, but it was with a different
1235 // pointer then we have a phi translation failure and we have to treat
1236 // this as a clobber.
1237 if (InsertRes.first->second != Pointer.getAddr()) {
1238 // Make sure to clean up the Visited map before continuing on to
1239 // PredTranslationFailure.
1240 for (auto *NewBlock : NewBlocks)
1241 Visited.erase(NewBlock);
1242 goto PredTranslationFailure;
1243 }
1244 }
1245 if (NewBlocks.size() > WorklistEntries) {
1246 // Make sure to clean up the Visited map before continuing on to
1247 // PredTranslationFailure.
1248 for (auto *NewBlock : NewBlocks)
1249 Visited.erase(NewBlock);
1250 GotWorklistLimit = true;
1251 goto PredTranslationFailure;
1252 }
1253 WorklistEntries -= NewBlocks.size();
1254 Worklist.append(NewBlocks.begin(), NewBlocks.end());
1255 continue;
1256 }
1257
1258 // We do need to do phi translation, if we know ahead of time we can't phi
1259 // translate this value, don't even try.
1260 if (!Pointer.isPotentiallyPHITranslatable())
1261 goto PredTranslationFailure;
1262
1263 // We may have added values to the cache list before this PHI translation.
1264 // If so, we haven't done anything to ensure that the cache remains sorted.
1265 // Sort it now (if needed) so that recursive invocations of
1266 // getNonLocalPointerDepFromBB and other routines that could reuse the cache
1267 // value will only see properly sorted cache arrays.
1268 if (Cache && NumSortedEntries != Cache->size()) {
1269 SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1270 NumSortedEntries = Cache->size();
1271 }
1272 Cache = nullptr;
1273
1274 PredList.clear();
1275 for (BasicBlock *Pred : PredCache.get(BB)) {
1276 PredList.push_back(std::make_pair(Pred, Pointer));
1277
1278 // Get the PHI translated pointer in this predecessor. This can fail if
1279 // not translatable, in which case the getAddr() returns null.
1280 PHITransAddr &PredPointer = PredList.back().second;
1281 Value *PredPtrVal =
1282 PredPointer.translateValue(BB, Pred, &DT, /*MustDominate=*/false);
1283
1284 // Check to see if we have already visited this pred block with another
1285 // pointer. If so, we can't do this lookup. This failure can occur
1286 // with PHI translation when a critical edge exists and the PHI node in
1287 // the successor translates to a pointer value different than the
1288 // pointer the block was first analyzed with.
1289 std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
1290 Visited.insert(std::make_pair(Pred, PredPtrVal));
1291
1292 if (!InsertRes.second) {
1293 // We found the pred; take it off the list of preds to visit.
1294 PredList.pop_back();
1295
1296 // If the predecessor was visited with PredPtr, then we already did
1297 // the analysis and can ignore it.
1298 if (InsertRes.first->second == PredPtrVal)
1299 continue;
1300
1301 // Otherwise, the block was previously analyzed with a different
1302 // pointer. We can't represent the result of this case, so we just
1303 // treat this as a phi translation failure.
1304
1305 // Make sure to clean up the Visited map before continuing on to
1306 // PredTranslationFailure.
1307 for (const auto &Pred : PredList)
1308 Visited.erase(Pred.first);
1309
1310 goto PredTranslationFailure;
1311 }
1312 }
1313
1314 // Actually process results here; this need to be a separate loop to avoid
1315 // calling getNonLocalPointerDepFromBB for blocks we don't want to return
1316 // any results for. (getNonLocalPointerDepFromBB will modify our
1317 // datastructures in ways the code after the PredTranslationFailure label
1318 // doesn't expect.)
1319 for (auto &I : PredList) {
1320 BasicBlock *Pred = I.first;
1321 PHITransAddr &PredPointer = I.second;
1322 Value *PredPtrVal = PredPointer.getAddr();
1323
1324 bool CanTranslate = true;
1325 // If PHI translation was unable to find an available pointer in this
1326 // predecessor, then we have to assume that the pointer is clobbered in
1327 // that predecessor. We can still do PRE of the load, which would insert
1328 // a computation of the pointer in this predecessor.
1329 if (!PredPtrVal)
1330 CanTranslate = false;
1331
1332 // FIXME: it is entirely possible that PHI translating will end up with
1333 // the same value. Consider PHI translating something like:
1334 // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need*
1335 // to recurse here, pedantically speaking.
1336
1337 // If getNonLocalPointerDepFromBB fails here, that means the cached
1338 // result conflicted with the Visited list; we have to conservatively
1339 // assume it is unknown, but this also does not block PRE of the load.
1340 if (!CanTranslate ||
1341 !getNonLocalPointerDepFromBB(QueryInst, PredPointer,
1342 Loc.getWithNewPtr(PredPtrVal), isLoad,
1343 Pred, Result, Visited)) {
1344 // Add the entry to the Result list.
1346 Result.push_back(Entry);
1347
1348 // Since we had a phi translation failure, the cache for CacheKey won't
1349 // include all of the entries that we need to immediately satisfy future
1350 // queries. Mark this in NonLocalPointerDeps by setting the
1351 // BBSkipFirstBlockPair pointer to null. This requires reuse of the
1352 // cached value to do more work but not miss the phi trans failure.
1353 NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
1354 NLPI.Pair = BBSkipFirstBlockPair();
1355 continue;
1356 }
1357 }
1358
1359 // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
1360 CacheInfo = &NonLocalPointerDeps[CacheKey];
1361 Cache = &CacheInfo->NonLocalDeps;
1362 NumSortedEntries = Cache->size();
1363
1364 // Since we did phi translation, the "Cache" set won't contain all of the
1365 // results for the query. This is ok (we can still use it to accelerate
1366 // specific block queries) but we can't do the fastpath "return all
1367 // results from the set" Clear out the indicator for this.
1368 CacheInfo->Pair = BBSkipFirstBlockPair();
1369 SkipFirstBlock = false;
1370 continue;
1371
1372 PredTranslationFailure:
1373 // The following code is "failure"; we can't produce a sane translation
1374 // for the given block. It assumes that we haven't modified any of
1375 // our datastructures while processing the current block.
1376
1377 if (!Cache) {
1378 // Refresh the CacheInfo/Cache pointer if it got invalidated.
1379 CacheInfo = &NonLocalPointerDeps[CacheKey];
1380 Cache = &CacheInfo->NonLocalDeps;
1381 NumSortedEntries = Cache->size();
1382 }
1383
1384 // Since we failed phi translation, the "Cache" set won't contain all of the
1385 // results for the query. This is ok (we can still use it to accelerate
1386 // specific block queries) but we can't do the fastpath "return all
1387 // results from the set". Clear out the indicator for this.
1388 CacheInfo->Pair = BBSkipFirstBlockPair();
1389
1390 // If *nothing* works, mark the pointer as unknown.
1391 //
1392 // If this is the magic first block, return this as a clobber of the whole
1393 // incoming value. Since we can't phi translate to one of the predecessors,
1394 // we have to bail out.
1395 if (SkipFirstBlock)
1396 return false;
1397
1398 // Results of invariant loads are not cached thus no need to update cached
1399 // information.
1400 if (!isInvariantLoad) {
1401 for (NonLocalDepEntry &I : llvm::reverse(*Cache)) {
1402 if (I.getBB() != BB)
1403 continue;
1404
1405 assert((GotWorklistLimit || I.getResult().isNonLocal() ||
1406 !DT.isReachableFromEntry(BB)) &&
1407 "Should only be here with transparent block");
1408
1409 I.setResult(MemDepResult::getUnknown());
1410
1411
1412 break;
1413 }
1414 }
1415 (void)GotWorklistLimit;
1416 // Go ahead and report unknown dependence.
1417 Result.push_back(
1419 }
1420
1421 // Okay, we're done now. If we added new values to the cache, re-sort it.
1422 SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1423 LLVM_DEBUG(AssertSorted(*Cache));
1424 return true;
1425}
1426
1427/// If P exists in CachedNonLocalPointerInfo or NonLocalDefsCache, remove it.
1428void MemoryDependenceResults::removeCachedNonLocalPointerDependencies(
1429 ValueIsLoadPair P) {
1430
1431 // Most of the time this cache is empty.
1432 if (!NonLocalDefsCache.empty()) {
1433 auto it = NonLocalDefsCache.find(P.getPointer());
1434 if (it != NonLocalDefsCache.end()) {
1435 RemoveFromReverseMap(ReverseNonLocalDefsCache,
1436 it->second.getResult().getInst(), P.getPointer());
1437 NonLocalDefsCache.erase(it);
1438 }
1439
1440 if (auto *I = dyn_cast<Instruction>(P.getPointer())) {
1441 auto toRemoveIt = ReverseNonLocalDefsCache.find(I);
1442 if (toRemoveIt != ReverseNonLocalDefsCache.end()) {
1443 for (const auto *entry : toRemoveIt->second)
1444 NonLocalDefsCache.erase(entry);
1445 ReverseNonLocalDefsCache.erase(toRemoveIt);
1446 }
1447 }
1448 }
1449
1450 CachedNonLocalPointerInfo::iterator It = NonLocalPointerDeps.find(P);
1451 if (It == NonLocalPointerDeps.end())
1452 return;
1453
1454 // Remove all of the entries in the BB->val map. This involves removing
1455 // instructions from the reverse map.
1456 NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
1457
1458 for (const NonLocalDepEntry &DE : PInfo) {
1459 Instruction *Target = DE.getResult().getInst();
1460 if (!Target)
1461 continue; // Ignore non-local dep results.
1462 assert(Target->getParent() == DE.getBB());
1463
1464 // Eliminating the dirty entry from 'Cache', so update the reverse info.
1465 RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
1466 }
1467
1468 // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
1469 NonLocalPointerDeps.erase(It);
1470}
1471
1473 // If Ptr isn't really a pointer, just ignore it.
1474 if (!Ptr->getType()->isPointerTy())
1475 return;
1476 // Flush store info for the pointer.
1477 removeCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
1478 // Flush load info for the pointer.
1479 removeCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
1480}
1481
1483 PredCache.clear();
1484}
1485
1487 EEA.removeInstruction(RemInst);
1488
1489 // Walk through the Non-local dependencies, removing this one as the value
1490 // for any cached queries.
1491 NonLocalDepMapType::iterator NLDI = NonLocalDepsMap.find(RemInst);
1492 if (NLDI != NonLocalDepsMap.end()) {
1493 NonLocalDepInfo &BlockMap = NLDI->second.first;
1494 for (auto &Entry : BlockMap)
1495 if (Instruction *Inst = Entry.getResult().getInst())
1496 RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst);
1497 NonLocalDepsMap.erase(NLDI);
1498 }
1499
1500 // If we have a cached local dependence query for this instruction, remove it.
1501 LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst);
1502 if (LocalDepEntry != LocalDeps.end()) {
1503 // Remove us from DepInst's reverse set now that the local dep info is gone.
1504 if (Instruction *Inst = LocalDepEntry->second.getInst())
1505 RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst);
1506
1507 // Remove this local dependency info.
1508 LocalDeps.erase(LocalDepEntry);
1509 }
1510
1511 // If we have any cached dependencies on this instruction, remove
1512 // them.
1513
1514 // If the instruction is a pointer, remove it from both the load info and the
1515 // store info.
1516 if (RemInst->getType()->isPointerTy()) {
1517 removeCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
1518 removeCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
1519 } else {
1520 // Otherwise, if the instructions is in the map directly, it must be a load.
1521 // Remove it.
1522 auto toRemoveIt = NonLocalDefsCache.find(RemInst);
1523 if (toRemoveIt != NonLocalDefsCache.end()) {
1524 assert(isa<LoadInst>(RemInst) &&
1525 "only load instructions should be added directly");
1526 const Instruction *DepV = toRemoveIt->second.getResult().getInst();
1527 ReverseNonLocalDefsCache.find(DepV)->second.erase(RemInst);
1528 NonLocalDefsCache.erase(toRemoveIt);
1529 }
1530 }
1531
1532 // Loop over all of the things that depend on the instruction we're removing.
1534
1535 // If we find RemInst as a clobber or Def in any of the maps for other values,
1536 // we need to replace its entry with a dirty version of the instruction after
1537 // it. If RemInst is a terminator, we use a null dirty value.
1538 //
1539 // Using a dirty version of the instruction after RemInst saves having to scan
1540 // the entire block to get to this point.
1541 MemDepResult NewDirtyVal;
1542 if (!RemInst->isTerminator())
1543 NewDirtyVal = MemDepResult::getDirty(&*++RemInst->getIterator());
1544
1545 ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
1546 if (ReverseDepIt != ReverseLocalDeps.end()) {
1547 // RemInst can't be the terminator if it has local stuff depending on it.
1548 assert(!ReverseDepIt->second.empty() && !RemInst->isTerminator() &&
1549 "Nothing can locally depend on a terminator");
1550
1551 for (Instruction *InstDependingOnRemInst : ReverseDepIt->second) {
1552 assert(InstDependingOnRemInst != RemInst &&
1553 "Already removed our local dep info");
1554
1555 LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
1556
1557 // Make sure to remember that new things depend on NewDepInst.
1558 assert(NewDirtyVal.getInst() &&
1559 "There is no way something else can have "
1560 "a local dep on this if it is a terminator!");
1561 ReverseDepsToAdd.push_back(
1562 std::make_pair(NewDirtyVal.getInst(), InstDependingOnRemInst));
1563 }
1564
1565 ReverseLocalDeps.erase(ReverseDepIt);
1566
1567 // Add new reverse deps after scanning the set, to avoid invalidating the
1568 // 'ReverseDeps' reference.
1569 while (!ReverseDepsToAdd.empty()) {
1570 ReverseLocalDeps[ReverseDepsToAdd.back().first].insert(
1571 ReverseDepsToAdd.back().second);
1572 ReverseDepsToAdd.pop_back();
1573 }
1574 }
1575
1576 ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
1577 if (ReverseDepIt != ReverseNonLocalDeps.end()) {
1578 for (Instruction *I : ReverseDepIt->second) {
1579 assert(I != RemInst && "Already removed NonLocalDep info for RemInst");
1580
1581 PerInstNLInfo &INLD = NonLocalDepsMap[I];
1582 // The information is now dirty!
1583 INLD.second = true;
1584
1585 for (auto &Entry : INLD.first) {
1586 if (Entry.getResult().getInst() != RemInst)
1587 continue;
1588
1589 // Convert to a dirty entry for the subsequent instruction.
1590 Entry.setResult(NewDirtyVal);
1591
1592 if (Instruction *NextI = NewDirtyVal.getInst())
1593 ReverseDepsToAdd.push_back(std::make_pair(NextI, I));
1594 }
1595 }
1596
1597 ReverseNonLocalDeps.erase(ReverseDepIt);
1598
1599 // Add new reverse deps after scanning the set, to avoid invalidating 'Set'
1600 while (!ReverseDepsToAdd.empty()) {
1601 ReverseNonLocalDeps[ReverseDepsToAdd.back().first].insert(
1602 ReverseDepsToAdd.back().second);
1603 ReverseDepsToAdd.pop_back();
1604 }
1605 }
1606
1607 // If the instruction is in ReverseNonLocalPtrDeps then it appears as a
1608 // value in the NonLocalPointerDeps info.
1609 ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
1610 ReverseNonLocalPtrDeps.find(RemInst);
1611 if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
1613 ReversePtrDepsToAdd;
1614
1615 for (ValueIsLoadPair P : ReversePtrDepIt->second) {
1616 assert(P.getPointer() != RemInst &&
1617 "Already removed NonLocalPointerDeps info for RemInst");
1618
1619 auto &NLPD = NonLocalPointerDeps[P];
1620
1621 NonLocalDepInfo &NLPDI = NLPD.NonLocalDeps;
1622
1623 // The cache is not valid for any specific block anymore.
1624 NLPD.Pair = BBSkipFirstBlockPair();
1625
1626 // Update any entries for RemInst to use the instruction after it.
1627 for (auto &Entry : NLPDI) {
1628 if (Entry.getResult().getInst() != RemInst)
1629 continue;
1630
1631 // Convert to a dirty entry for the subsequent instruction.
1632 Entry.setResult(NewDirtyVal);
1633
1634 if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
1635 ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
1636 }
1637
1638 // Re-sort the NonLocalDepInfo. Changing the dirty entry to its
1639 // subsequent value may invalidate the sortedness.
1640 llvm::sort(NLPDI);
1641 }
1642
1643 ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
1644
1645 while (!ReversePtrDepsToAdd.empty()) {
1646 ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first].insert(
1647 ReversePtrDepsToAdd.back().second);
1648 ReversePtrDepsToAdd.pop_back();
1649 }
1650 }
1651
1652 assert(!NonLocalDepsMap.count(RemInst) && "RemInst got reinserted?");
1653 LLVM_DEBUG(verifyRemoved(RemInst));
1654}
1655
1656/// Verify that the specified instruction does not occur in our internal data
1657/// structures.
1658///
1659/// This function verifies by asserting in debug builds.
1660void MemoryDependenceResults::verifyRemoved(Instruction *D) const {
1661#ifndef NDEBUG
1662 for (const auto &DepKV : LocalDeps) {
1663 assert(DepKV.first != D && "Inst occurs in data structures");
1664 assert(DepKV.second.getInst() != D && "Inst occurs in data structures");
1665 }
1666
1667 for (const auto &DepKV : NonLocalPointerDeps) {
1668 assert(DepKV.first.getPointer() != D && "Inst occurs in NLPD map key");
1669 for (const auto &Entry : DepKV.second.NonLocalDeps)
1670 assert(Entry.getResult().getInst() != D && "Inst occurs as NLPD value");
1671 }
1672
1673 for (const auto &DepKV : NonLocalDepsMap) {
1674 assert(DepKV.first != D && "Inst occurs in data structures");
1675 const PerInstNLInfo &INLD = DepKV.second;
1676 for (const auto &Entry : INLD.first)
1677 assert(Entry.getResult().getInst() != D &&
1678 "Inst occurs in data structures");
1679 }
1680
1681 for (const auto &DepKV : ReverseLocalDeps) {
1682 assert(DepKV.first != D && "Inst occurs in data structures");
1683 for (Instruction *Inst : DepKV.second)
1684 assert(Inst != D && "Inst occurs in data structures");
1685 }
1686
1687 for (const auto &DepKV : ReverseNonLocalDeps) {
1688 assert(DepKV.first != D && "Inst occurs in data structures");
1689 for (Instruction *Inst : DepKV.second)
1690 assert(Inst != D && "Inst occurs in data structures");
1691 }
1692
1693 for (const auto &DepKV : ReverseNonLocalPtrDeps) {
1694 assert(DepKV.first != D && "Inst occurs in rev NLPD map");
1695
1696 for (ValueIsLoadPair P : DepKV.second)
1697 assert(P != ValueIsLoadPair(D, false) && P != ValueIsLoadPair(D, true) &&
1698 "Inst occurs in ReverseNonLocalPtrDeps map");
1699 }
1700#endif
1701}
1702
1703AnalysisKey MemoryDependenceAnalysis::Key;
1704
1706 : DefaultBlockScanLimit(BlockScanLimit) {}
1707
1710 auto &AA = AM.getResult<AAManager>(F);
1711 auto &AC = AM.getResult<AssumptionAnalysis>(F);
1712 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1713 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1714 return MemoryDependenceResults(AA, AC, TLI, DT, DefaultBlockScanLimit);
1715}
1716
1718
1720 "Memory Dependence Analysis", false, true)
1727
1729
1731
1733 MemDep.reset();
1734}
1735
1737 AU.setPreservesAll();
1742}
1743
1746 // Check whether our analysis is preserved.
1747 auto PAC = PA.getChecker<MemoryDependenceAnalysis>();
1748 if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
1749 // If not, give up now.
1750 return true;
1751
1752 // Check whether the analyses we depend on became invalid for any reason.
1753 if (Inv.invalidate<AAManager>(F, PA) ||
1754 Inv.invalidate<AssumptionAnalysis>(F, PA) ||
1756 return true;
1757
1758 // Otherwise this analysis result remains valid.
1759 return false;
1760}
1761
1763 return DefaultBlockScanLimit;
1764}
1765
1767 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1768 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1769 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1770 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1771 MemDep.emplace(AA, AC, TLI, DT, BlockScanLimit);
1772 return false;
1773}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isLoad(int Opcode)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
block Block Frequency Analysis
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
Module.h This file contains the declarations for the Module class.
This defines the Use class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static const unsigned int NumResultsLimit
static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, const TargetLibraryInfo &TLI)
If the given instruction references a specific memory location, fill in Loc with the details,...
static cl::opt< unsigned > BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(200), cl::desc("The number of blocks to scan during memory " "dependency analysis (default = 200)"))
static void RemoveFromReverseMap(DenseMap< Instruction *, SmallPtrSet< KeyTy, 4 > > &ReverseMap, Instruction *Inst, KeyTy Val)
This is a helper function that removes Val from 'Inst's set in ReverseMap.
static void SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache, unsigned NumSortedEntries)
Sort the NonLocalDepInfo cache, given a certain number of elements in the array that are already prop...
static void AssertSorted(MemoryDependenceResults::NonLocalDepInfo &Cache, int Count=-1)
This method is used when -debug is specified to verify that cache arrays are properly kept sorted.
static bool canSkipClobberingStore(const StoreInst *SI, const MemoryLocation &MemLoc, Align MemLocAlign, BatchAAResults &BatchAA, unsigned ScanLimit)
static cl::opt< unsigned > BlockScanLimit("memdep-block-scan-limit", cl::Hidden, cl::init(100), cl::desc("The number of instructions to scan in a block in memory " "dependency analysis (default = 100)"))
This file provides utility analysis objects describing memory locations.
static bool isOrdered(const Instruction *I)
Definition: MemorySSA.cpp:1745
This file contains the declarations for metadata subclasses.
static bool isInvariantLoad(const LoadInst *LI, const bool IsKernelFn)
uint64_t IntrinsicInst * II
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:39
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
#define LLVM_DEBUG(...)
Definition: Debug.h:119
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
bool onlyReadsMemory(const CallBase *Call)
Checks if the specified call is known to only read from non-volatile memory (or not access memory at ...
The possible results of an alias query.
Definition: AliasAnalysis.h:78
@ NoAlias
The two locations do not alias at all.
Definition: AliasAnalysis.h:96
@ PartialAlias
The two locations alias, but only due to a partial overlap.
@ MustAlias
The two locations precisely alias each other.
This templated class represents "all analyses that operate over <a particular IR unit>" (e....
Definition: Analysis.h:50
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:294
bool invalidate(IRUnitT &IR, const PreservedAnalyses &PA)
Trigger the invalidation of some other analysis pass if not already handled and return whether it was...
Definition: PassManager.h:312
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:255
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:412
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
AnalysisUsage & addRequiredTransitive()
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
iterator end()
Definition: BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:459
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:213
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
Definition: BasicBlock.cpp:252
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:170
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
ModRefInfo getModRefInfoMask(const MemoryLocation &Loc, bool IgnoreLocals=false)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1116
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:177
bool erase(const KeyT &Val)
Definition: DenseMap.h:319
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition: DenseMap.h:74
bool empty() const
Definition: DenseMap.h:119
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:173
iterator end()
Definition: DenseMap.h:87
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:230
Dependence - This class represents a dependence between two memory memory references in a function.
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:284
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:322
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:334
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:135
void removeInstruction(Instruction *I)
An instruction for ordering other memory operations.
Definition: Instructions.h:429
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:314
const BasicBlock & getEntryBlock() const
Definition: Function.h:807
LLVM_ABI bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
Definition: Instruction.h:406
bool isTerminator() const
Definition: Instruction.h:315
LLVM_ABI bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
LLVM_ABI bool isVolatile() const LLVM_READONLY
Return true if this instruction has a volatile memory access.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:49
An instruction for reading from memory.
Definition: Instructions.h:180
Value * getPointerOperand()
Definition: Instructions.h:259
bool hasValue() const
bool isScalable() const
TypeSize getValue() const
A memory dependence query can return one of three different answers.
bool isNonLocal() const
Tests if this MemDepResult represents a query that is transparent to the start of the block,...
static MemDepResult getNonLocal()
bool isNonFuncLocal() const
Tests if this MemDepResult represents a query that is transparent to the start of the function.
static MemDepResult getClobber(Instruction *Inst)
bool isDef() const
Tests if this MemDepResult represents a query that is an instruction definition dependency.
static MemDepResult getUnknown()
bool isLocal() const
Tests if this MemDepResult represents a valid local query (Clobber/Def).
bool isUnknown() const
Tests if this MemDepResult represents a query which cannot and/or will not be computed.
static MemDepResult getNonFuncLocal()
static MemDepResult getDef(Instruction *Inst)
get methods: These are static ctor methods for creating various MemDepResult kinds.
Instruction * getInst() const
If this is a normal dependency, returns the instruction that is depended on.
An analysis that produces MemoryDependenceResults for a function.
MemoryDependenceResults run(Function &F, FunctionAnalysisManager &AM)
Provides a lazy, caching interface for making common memory aliasing information queries,...
MemDepResult getSimplePointerDependencyFrom(const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst, unsigned *Limit, BatchAAResults &BatchAA)
std::vector< NonLocalDepEntry > NonLocalDepInfo
void invalidateCachedPredecessors()
Clears the PredIteratorCache info.
void invalidateCachedPointerInfo(Value *Ptr)
Invalidates cached information about the specified pointer, because it may be too conservative in mem...
MemDepResult getPointerDependencyFrom(const MemoryLocation &Loc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst=nullptr, unsigned *Limit=nullptr)
Returns the instruction on which a memory location depends.
void removeInstruction(Instruction *InstToRemove)
Removes an instruction from the dependence analysis, updating the dependence of instructions that pre...
MemDepResult getInvariantGroupPointerDependency(LoadInst *LI, BasicBlock *BB)
This analysis looks for other loads and stores with invariant.group metadata and the same pointer ope...
unsigned getDefaultBlockScanLimit() const
Some methods limit the number of instructions they will examine.
MemDepResult getDependency(Instruction *QueryInst)
Returns the instruction on which a memory operation depends.
const NonLocalDepInfo & getNonLocalCallDependency(CallBase *QueryCall)
Perform a full dependency query for the specified call, returning the set of blocks that the value is...
void getNonLocalPointerDependency(Instruction *QueryInst, SmallVectorImpl< NonLocalDepResult > &Result)
Perform a full dependency query for an access to the QueryInst's specified memory location,...
bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv)
Handle invalidation in the new PM.
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance.
bool runOnFunction(Function &) override
Pass Implementation stuff. This doesn't do any analysis eagerly.
void getAnalysisUsage(AnalysisUsage &AU) const override
Does not modify anything. It uses Value Numbering and Alias Analysis.
void releaseMemory() override
Clean up memory in between runs.
Representation for a specific memory location.
MemoryLocation getWithoutAATags() const
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
MemoryLocation getWithNewPtr(const Value *NewPtr) const
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
const Value * Ptr
The address of the start of the location.
static LLVM_ABI MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
This is an entry in the NonLocalDepInfo cache.
void setResult(const MemDepResult &R)
const MemDepResult & getResult() const
This is a result from a NonLocal dependence query.
PHITransAddr - An address value which tracks and handles phi translation.
Definition: PHITransAddr.h:36
LLVM_ABI Value * translateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
translateValue - PHI translate the current address up the CFG from CurBB to Pred, updating our state ...
Value * getAddr() const
Definition: PHITransAddr.h:59
void clear()
clear - Remove all information.
ArrayRef< BasicBlock * > get(BasicBlock *BB)
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:112
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Definition: Analysis.h:275
size_type size() const
Definition: SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:684
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
An instruction for storing to memory.
Definition: Instructions.h:296
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Target - Wrapper for Target specific information.
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:267
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
This class represents the va_arg llvm instruction, which returns an argument of the specified type gi...
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:953
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:701
iterator_range< use_iterator > uses()
Definition: Value.h:380
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:169
const ParentTy * getParent() const
Definition: ilist_node.h:34
self_iterator getIterator()
Definition: ilist_node.h:134
This class provides various memory handling functions that manipulate MemoryBlock instances.
Definition: Memory.h:54
@ Entry
Definition: COFF.h:862
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2155
bool isStrongerThanUnordered(AtomicOrdering AO)
LLVM_ABI bool isNoAliasCall(const Value *V)
Return true if this pointer is returned by a noalias function.
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:336
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:49
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1669
bool isModOrRefSet(const ModRefInfo MRI)
Definition: ModRef.h:43
AtomicOrdering
Atomic ordering for LLVM's memory model.
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition: ModRef.h:28
@ Ref
The access may reference the value stored in memory.
@ Mod
The access may modify the value stored in memory.
@ NoModRef
The access neither references nor modifies the value stored in memory.
@ Other
Any other memory.
LLVM_ABI Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
bool isNoModRef(const ModRefInfo MRI)
Definition: ModRef.h:40
bool isStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice,...
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: Analysis.h:29