LLVM 22.0.0git
DeadStoreElimination.cpp
Go to the documentation of this file.
1//===- DeadStoreElimination.cpp - MemorySSA Backed Dead Store Elimination -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The code below implements dead store elimination using MemorySSA. It uses
10// the following general approach: given a MemoryDef, walk upwards to find
11// clobbering MemoryDefs that may be killed by the starting def. Then check
12// that there are no uses that may read the location of the original MemoryDef
13// in between both MemoryDefs. A bit more concretely:
14//
15// For all MemoryDefs StartDef:
16// 1. Get the next dominating clobbering MemoryDef (MaybeDeadAccess) by walking
17// upwards.
18// 2. Check that there are no reads between MaybeDeadAccess and the StartDef by
19// checking all uses starting at MaybeDeadAccess and walking until we see
20// StartDef.
21// 3. For each found CurrentDef, check that:
22// 1. There are no barrier instructions between CurrentDef and StartDef (like
23// throws or stores with ordering constraints).
24// 2. StartDef is executed whenever CurrentDef is executed.
25// 3. StartDef completely overwrites CurrentDef.
26// 4. Erase CurrentDef from the function and MemorySSA.
27//
28//===----------------------------------------------------------------------===//
29
31#include "llvm/ADT/APInt.h"
32#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/MapVector.h"
35#include "llvm/ADT/SetVector.h"
38#include "llvm/ADT/Statistic.h"
39#include "llvm/ADT/StringRef.h"
53#include "llvm/IR/Argument.h"
55#include "llvm/IR/BasicBlock.h"
56#include "llvm/IR/Constant.h"
58#include "llvm/IR/Constants.h"
59#include "llvm/IR/DataLayout.h"
60#include "llvm/IR/DebugInfo.h"
61#include "llvm/IR/Dominators.h"
62#include "llvm/IR/Function.h"
63#include "llvm/IR/IRBuilder.h"
65#include "llvm/IR/InstrTypes.h"
66#include "llvm/IR/Instruction.h"
69#include "llvm/IR/Module.h"
70#include "llvm/IR/PassManager.h"
72#include "llvm/IR/Value.h"
76#include "llvm/Support/Debug.h"
83#include <algorithm>
84#include <cassert>
85#include <cstdint>
86#include <map>
87#include <optional>
88#include <utility>
89
90using namespace llvm;
91using namespace PatternMatch;
92
93#define DEBUG_TYPE "dse"
94
95STATISTIC(NumRemainingStores, "Number of stores remaining after DSE");
96STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
97STATISTIC(NumFastStores, "Number of stores deleted");
98STATISTIC(NumFastOther, "Number of other instrs removed");
99STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
100STATISTIC(NumModifiedStores, "Number of stores modified");
101STATISTIC(NumCFGChecks, "Number of stores modified");
102STATISTIC(NumCFGTries, "Number of stores modified");
103STATISTIC(NumCFGSuccess, "Number of stores modified");
104STATISTIC(NumGetDomMemoryDefPassed,
105 "Number of times a valid candidate is returned from getDomMemoryDef");
106STATISTIC(NumDomMemDefChecks,
107 "Number iterations check for reads in getDomMemoryDef");
108
109DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa",
110 "Controls which MemoryDefs are eliminated.");
111
112static cl::opt<bool>
113EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
114 cl::init(true), cl::Hidden,
115 cl::desc("Enable partial-overwrite tracking in DSE"));
116
117static cl::opt<bool>
118EnablePartialStoreMerging("enable-dse-partial-store-merging",
119 cl::init(true), cl::Hidden,
120 cl::desc("Enable partial store merging in DSE"));
121
123 MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden,
124 cl::desc("The number of memory instructions to scan for "
125 "dead store elimination (default = 150)"));
127 "dse-memoryssa-walklimit", cl::init(90), cl::Hidden,
128 cl::desc("The maximum number of steps while walking upwards to find "
129 "MemoryDefs that may be killed (default = 90)"));
130
132 "dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden,
133 cl::desc("The maximum number candidates that only partially overwrite the "
134 "killing MemoryDef to consider"
135 " (default = 5)"));
136
138 "dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden,
139 cl::desc("The number of MemoryDefs we consider as candidates to eliminated "
140 "other stores per basic block (default = 5000)"));
141
143 "dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden,
144 cl::desc(
145 "The cost of a step in the same basic block as the killing MemoryDef"
146 "(default = 1)"));
147
149 MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5),
151 cl::desc("The cost of a step in a different basic "
152 "block than the killing MemoryDef"
153 "(default = 5)"));
154
156 "dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden,
157 cl::desc("The maximum number of blocks to check when trying to prove that "
158 "all paths to an exit go through a killing block (default = 50)"));
159
160// This flags allows or disallows DSE to optimize MemorySSA during its
161// traversal. Note that DSE optimizing MemorySSA may impact other passes
162// downstream of the DSE invocation and can lead to issues not being
163// reproducible in isolation (i.e. when MemorySSA is built from scratch). In
164// those cases, the flag can be used to check if DSE's MemorySSA optimizations
165// impact follow-up passes.
166static cl::opt<bool>
167 OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden,
168 cl::desc("Allow DSE to optimize memory accesses."));
169
170// TODO: remove this flag.
172 "enable-dse-initializes-attr-improvement", cl::init(true), cl::Hidden,
173 cl::desc("Enable the initializes attr improvement in DSE"));
174
175//===----------------------------------------------------------------------===//
176// Helper functions
177//===----------------------------------------------------------------------===//
178using OverlapIntervalsTy = std::map<int64_t, int64_t>;
180
181/// Returns true if the end of this instruction can be safely shortened in
182/// length.
184 // Don't shorten stores for now
185 if (isa<StoreInst>(I))
186 return false;
187
188 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
189 switch (II->getIntrinsicID()) {
190 default: return false;
191 case Intrinsic::memset:
192 case Intrinsic::memcpy:
193 case Intrinsic::memcpy_element_unordered_atomic:
194 case Intrinsic::memset_element_unordered_atomic:
195 // Do shorten memory intrinsics.
196 // FIXME: Add memmove if it's also safe to transform.
197 return true;
198 }
199 }
200
201 // Don't shorten libcalls calls for now.
202
203 return false;
204}
205
206/// Returns true if the beginning of this instruction can be safely shortened
207/// in length.
209 // FIXME: Handle only memset for now. Supporting memcpy/memmove should be
210 // easily done by offsetting the source address.
211 return isa<AnyMemSetInst>(I);
212}
213
214static std::optional<TypeSize> getPointerSize(const Value *V,
215 const DataLayout &DL,
216 const TargetLibraryInfo &TLI,
217 const Function *F) {
219 ObjectSizeOpts Opts;
221
222 if (getObjectSize(V, Size, DL, &TLI, Opts))
223 return TypeSize::getFixed(Size);
224 return std::nullopt;
225}
226
227namespace {
228
229enum OverwriteResult {
230 OW_Begin,
231 OW_Complete,
232 OW_End,
233 OW_PartialEarlierWithFullLater,
234 OW_MaybePartial,
235 OW_None,
236 OW_Unknown
237};
238
239} // end anonymous namespace
240
241/// Check if two instruction are masked stores that completely
242/// overwrite one another. More specifically, \p KillingI has to
243/// overwrite \p DeadI.
244static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
245 const Instruction *DeadI,
246 BatchAAResults &AA) {
247 const auto *KillingII = dyn_cast<IntrinsicInst>(KillingI);
248 const auto *DeadII = dyn_cast<IntrinsicInst>(DeadI);
249 if (KillingII == nullptr || DeadII == nullptr)
250 return OW_Unknown;
251 if (KillingII->getIntrinsicID() != DeadII->getIntrinsicID())
252 return OW_Unknown;
253
254 switch (KillingII->getIntrinsicID()) {
255 case Intrinsic::masked_store:
256 case Intrinsic::vp_store: {
257 const DataLayout &DL = KillingII->getDataLayout();
258 auto *KillingTy = KillingII->getArgOperand(0)->getType();
259 auto *DeadTy = DeadII->getArgOperand(0)->getType();
260 if (DL.getTypeSizeInBits(KillingTy) != DL.getTypeSizeInBits(DeadTy))
261 return OW_Unknown;
262 // Element count.
263 if (cast<VectorType>(KillingTy)->getElementCount() !=
264 cast<VectorType>(DeadTy)->getElementCount())
265 return OW_Unknown;
266 // Pointers.
267 Value *KillingPtr = KillingII->getArgOperand(1);
268 Value *DeadPtr = DeadII->getArgOperand(1);
269 if (KillingPtr != DeadPtr && !AA.isMustAlias(KillingPtr, DeadPtr))
270 return OW_Unknown;
271 if (KillingII->getIntrinsicID() == Intrinsic::masked_store) {
272 // Masks.
273 // TODO: check that KillingII's mask is a superset of the DeadII's mask.
274 if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))
275 return OW_Unknown;
276 } else if (KillingII->getIntrinsicID() == Intrinsic::vp_store) {
277 // Masks.
278 // TODO: check that KillingII's mask is a superset of the DeadII's mask.
279 if (KillingII->getArgOperand(2) != DeadII->getArgOperand(2))
280 return OW_Unknown;
281 // Lengths.
282 if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))
283 return OW_Unknown;
284 }
285 return OW_Complete;
286 }
287 default:
288 return OW_Unknown;
289 }
290}
291
292/// Return 'OW_Complete' if a store to the 'KillingLoc' location completely
293/// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the
294/// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'
295/// if the beginning of the 'DeadLoc' location is overwritten by 'KillingLoc'.
296/// 'OW_PartialEarlierWithFullLater' means that a dead (big) store was
297/// overwritten by a killing (smaller) store which doesn't write outside the big
298/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
299/// NOTE: This function must only be called if both \p KillingLoc and \p
300/// DeadLoc belong to the same underlying object with valid \p KillingOff and
301/// \p DeadOff.
302static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,
303 const MemoryLocation &DeadLoc,
304 int64_t KillingOff, int64_t DeadOff,
305 Instruction *DeadI,
307 const uint64_t KillingSize = KillingLoc.Size.getValue();
308 const uint64_t DeadSize = DeadLoc.Size.getValue();
309 // We may now overlap, although the overlap is not complete. There might also
310 // be other incomplete overlaps, and together, they might cover the complete
311 // dead store.
312 // Note: The correctness of this logic depends on the fact that this function
313 // is not even called providing DepWrite when there are any intervening reads.
315 KillingOff < int64_t(DeadOff + DeadSize) &&
316 int64_t(KillingOff + KillingSize) >= DeadOff) {
317
318 // Insert our part of the overlap into the map.
319 auto &IM = IOL[DeadI];
320 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: DeadLoc [" << DeadOff << ", "
321 << int64_t(DeadOff + DeadSize) << ") KillingLoc ["
322 << KillingOff << ", " << int64_t(KillingOff + KillingSize)
323 << ")\n");
324
325 // Make sure that we only insert non-overlapping intervals and combine
326 // adjacent intervals. The intervals are stored in the map with the ending
327 // offset as the key (in the half-open sense) and the starting offset as
328 // the value.
329 int64_t KillingIntStart = KillingOff;
330 int64_t KillingIntEnd = KillingOff + KillingSize;
331
332 // Find any intervals ending at, or after, KillingIntStart which start
333 // before KillingIntEnd.
334 auto ILI = IM.lower_bound(KillingIntStart);
335 if (ILI != IM.end() && ILI->second <= KillingIntEnd) {
336 // This existing interval is overlapped with the current store somewhere
337 // in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing
338 // intervals and adjusting our start and end.
339 KillingIntStart = std::min(KillingIntStart, ILI->second);
340 KillingIntEnd = std::max(KillingIntEnd, ILI->first);
341 ILI = IM.erase(ILI);
342
343 // Continue erasing and adjusting our end in case other previous
344 // intervals are also overlapped with the current store.
345 //
346 // |--- dead 1 ---| |--- dead 2 ---|
347 // |------- killing---------|
348 //
349 while (ILI != IM.end() && ILI->second <= KillingIntEnd) {
350 assert(ILI->second > KillingIntStart && "Unexpected interval");
351 KillingIntEnd = std::max(KillingIntEnd, ILI->first);
352 ILI = IM.erase(ILI);
353 }
354 }
355
356 IM[KillingIntEnd] = KillingIntStart;
357
358 ILI = IM.begin();
359 if (ILI->second <= DeadOff && ILI->first >= int64_t(DeadOff + DeadSize)) {
360 LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: DeadLoc ["
361 << DeadOff << ", " << int64_t(DeadOff + DeadSize)
362 << ") Composite KillingLoc [" << ILI->second << ", "
363 << ILI->first << ")\n");
364 ++NumCompletePartials;
365 return OW_Complete;
366 }
367 }
368
369 // Check for a dead store which writes to all the memory locations that
370 // the killing store writes to.
371 if (EnablePartialStoreMerging && KillingOff >= DeadOff &&
372 int64_t(DeadOff + DeadSize) > KillingOff &&
373 uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {
374 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite a dead load [" << DeadOff
375 << ", " << int64_t(DeadOff + DeadSize)
376 << ") by a killing store [" << KillingOff << ", "
377 << int64_t(KillingOff + KillingSize) << ")\n");
378 // TODO: Maybe come up with a better name?
379 return OW_PartialEarlierWithFullLater;
380 }
381
382 // Another interesting case is if the killing store overwrites the end of the
383 // dead store.
384 //
385 // |--dead--|
386 // |-- killing --|
387 //
388 // In this case we may want to trim the size of dead store to avoid
389 // generating stores to addresses which will definitely be overwritten killing
390 // store.
392 (KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&
393 int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))
394 return OW_End;
395
396 // Finally, we also need to check if the killing store overwrites the
397 // beginning of the dead store.
398 //
399 // |--dead--|
400 // |-- killing --|
401 //
402 // In this case we may want to move the destination address and trim the size
403 // of dead store to avoid generating stores to addresses which will definitely
404 // be overwritten killing store.
406 (KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {
407 assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&
408 "Expect to be handled as OW_Complete");
409 return OW_Begin;
410 }
411 // Otherwise, they don't completely overlap.
412 return OW_Unknown;
413}
414
415/// Returns true if the memory which is accessed by the second instruction is not
416/// modified between the first and the second instruction.
417/// Precondition: Second instruction must be dominated by the first
418/// instruction.
419static bool
421 BatchAAResults &AA, const DataLayout &DL,
422 DominatorTree *DT) {
423 // Do a backwards scan through the CFG from SecondI to FirstI. Look for
424 // instructions which can modify the memory location accessed by SecondI.
425 //
426 // While doing the walk keep track of the address to check. It might be
427 // different in different basic blocks due to PHI translation.
428 using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>;
430 // Keep track of the address we visited each block with. Bail out if we
431 // visit a block with different addresses.
433
434 BasicBlock::iterator FirstBBI(FirstI);
435 ++FirstBBI;
436 BasicBlock::iterator SecondBBI(SecondI);
437 BasicBlock *FirstBB = FirstI->getParent();
438 BasicBlock *SecondBB = SecondI->getParent();
439 MemoryLocation MemLoc;
440 if (auto *MemSet = dyn_cast<MemSetInst>(SecondI))
441 MemLoc = MemoryLocation::getForDest(MemSet);
442 else
443 MemLoc = MemoryLocation::get(SecondI);
444
445 auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);
446
447 // Start checking the SecondBB.
448 WorkList.push_back(
449 std::make_pair(SecondBB, PHITransAddr(MemLocPtr, DL, nullptr)));
450 bool isFirstBlock = true;
451
452 // Check all blocks going backward until we reach the FirstBB.
453 while (!WorkList.empty()) {
454 BlockAddressPair Current = WorkList.pop_back_val();
455 BasicBlock *B = Current.first;
456 PHITransAddr &Addr = Current.second;
457 Value *Ptr = Addr.getAddr();
458
459 // Ignore instructions before FirstI if this is the FirstBB.
460 BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
461
463 if (isFirstBlock) {
464 // Ignore instructions after SecondI if this is the first visit of SecondBB.
465 assert(B == SecondBB && "first block is not the store block");
466 EI = SecondBBI;
467 isFirstBlock = false;
468 } else {
469 // It's not SecondBB or (in case of a loop) the second visit of SecondBB.
470 // In this case we also have to look at instructions after SecondI.
471 EI = B->end();
472 }
473 for (; BI != EI; ++BI) {
474 Instruction *I = &*BI;
475 if (I->mayWriteToMemory() && I != SecondI)
476 if (isModSet(AA.getModRefInfo(I, MemLoc.getWithNewPtr(Ptr))))
477 return false;
478 }
479 if (B != FirstBB) {
480 assert(B != &FirstBB->getParent()->getEntryBlock() &&
481 "Should not hit the entry block because SI must be dominated by LI");
482 for (BasicBlock *Pred : predecessors(B)) {
483 PHITransAddr PredAddr = Addr;
484 if (PredAddr.needsPHITranslationFromBlock(B)) {
485 if (!PredAddr.isPotentiallyPHITranslatable())
486 return false;
487 if (!PredAddr.translateValue(B, Pred, DT, false))
488 return false;
489 }
490 Value *TranslatedPtr = PredAddr.getAddr();
491 auto Inserted = Visited.insert(std::make_pair(Pred, TranslatedPtr));
492 if (!Inserted.second) {
493 // We already visited this block before. If it was with a different
494 // address - bail out!
495 if (TranslatedPtr != Inserted.first->second)
496 return false;
497 // ... otherwise just skip it.
498 continue;
499 }
500 WorkList.push_back(std::make_pair(Pred, PredAddr));
501 }
502 }
503 }
504 return true;
505}
506
507static void shortenAssignment(Instruction *Inst, Value *OriginalDest,
508 uint64_t OldOffsetInBits, uint64_t OldSizeInBits,
509 uint64_t NewSizeInBits, bool IsOverwriteEnd) {
510 const DataLayout &DL = Inst->getDataLayout();
511 uint64_t DeadSliceSizeInBits = OldSizeInBits - NewSizeInBits;
512 uint64_t DeadSliceOffsetInBits =
513 OldOffsetInBits + (IsOverwriteEnd ? NewSizeInBits : 0);
514 auto SetDeadFragExpr = [](auto *Assign,
515 DIExpression::FragmentInfo DeadFragment) {
516 // createFragmentExpression expects an offset relative to the existing
517 // fragment offset if there is one.
518 uint64_t RelativeOffset = DeadFragment.OffsetInBits -
519 Assign->getExpression()
520 ->getFragmentInfo()
521 .value_or(DIExpression::FragmentInfo(0, 0))
522 .OffsetInBits;
524 Assign->getExpression(), RelativeOffset, DeadFragment.SizeInBits)) {
525 Assign->setExpression(*NewExpr);
526 return;
527 }
528 // Failed to create a fragment expression for this so discard the value,
529 // making this a kill location.
531 DIExpression::get(Assign->getContext(), {}), DeadFragment.OffsetInBits,
532 DeadFragment.SizeInBits);
533 Assign->setExpression(Expr);
534 Assign->setKillLocation();
535 };
536
537 // A DIAssignID to use so that the inserted dbg.assign intrinsics do not
538 // link to any instructions. Created in the loop below (once).
539 DIAssignID *LinkToNothing = nullptr;
540 LLVMContext &Ctx = Inst->getContext();
541 auto GetDeadLink = [&Ctx, &LinkToNothing]() {
542 if (!LinkToNothing)
543 LinkToNothing = DIAssignID::getDistinct(Ctx);
544 return LinkToNothing;
545 };
546
547 // Insert an unlinked dbg.assign intrinsic for the dead fragment after each
548 // overlapping dbg.assign intrinsic.
549 for (DbgVariableRecord *Assign : at::getDVRAssignmentMarkers(Inst)) {
550 std::optional<DIExpression::FragmentInfo> NewFragment;
551 if (!at::calculateFragmentIntersect(DL, OriginalDest, DeadSliceOffsetInBits,
552 DeadSliceSizeInBits, Assign,
553 NewFragment) ||
554 !NewFragment) {
555 // We couldn't calculate the intersecting fragment for some reason. Be
556 // cautious and unlink the whole assignment from the store.
557 Assign->setKillAddress();
558 Assign->setAssignId(GetDeadLink());
559 continue;
560 }
561 // No intersect.
562 if (NewFragment->SizeInBits == 0)
563 continue;
564
565 // Fragments overlap: insert a new dbg.assign for this dead part.
566 auto *NewAssign = static_cast<decltype(Assign)>(Assign->clone());
567 NewAssign->insertAfter(Assign->getIterator());
568 NewAssign->setAssignId(GetDeadLink());
569 if (NewFragment)
570 SetDeadFragExpr(NewAssign, *NewFragment);
571 NewAssign->setKillAddress();
572 }
573}
574
575/// Update the attributes given that a memory access is updated (the
576/// dereferenced pointer could be moved forward when shortening a
577/// mem intrinsic).
578static void adjustArgAttributes(AnyMemIntrinsic *Intrinsic, unsigned ArgNo,
579 uint64_t PtrOffset) {
580 // Remember old attributes.
581 AttributeSet OldAttrs = Intrinsic->getParamAttributes(ArgNo);
582
583 // Find attributes that should be kept, and remove the rest.
584 AttributeMask AttrsToRemove;
585 for (auto &Attr : OldAttrs) {
586 if (Attr.hasKindAsEnum()) {
587 switch (Attr.getKindAsEnum()) {
588 default:
589 break;
590 case Attribute::Alignment:
591 // Only keep alignment if PtrOffset satisfy the alignment.
592 if (isAligned(Attr.getAlignment().valueOrOne(), PtrOffset))
593 continue;
594 break;
595 case Attribute::Dereferenceable:
596 case Attribute::DereferenceableOrNull:
597 // We could reduce the size of these attributes according to
598 // PtrOffset. But we simply drop these for now.
599 break;
600 case Attribute::NonNull:
601 case Attribute::NoUndef:
602 continue;
603 }
604 }
605 AttrsToRemove.addAttribute(Attr);
606 }
607
608 // Remove the attributes that should be dropped.
609 Intrinsic->removeParamAttrs(ArgNo, AttrsToRemove);
610}
611
612static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
613 uint64_t &DeadSize, int64_t KillingStart,
614 uint64_t KillingSize, bool IsOverwriteEnd) {
615 auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);
616 Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
617
618 // We assume that memet/memcpy operates in chunks of the "largest" native
619 // type size and aligned on the same value. That means optimal start and size
620 // of memset/memcpy should be modulo of preferred alignment of that type. That
621 // is it there is no any sense in trying to reduce store size any further
622 // since any "extra" stores comes for free anyway.
623 // On the other hand, maximum alignment we can achieve is limited by alignment
624 // of initial store.
625
626 // TODO: Limit maximum alignment by preferred (or abi?) alignment of the
627 // "largest" native type.
628 // Note: What is the proper way to get that value?
629 // Should TargetTransformInfo::getRegisterBitWidth be used or anything else?
630 // PrefAlign = std::min(DL.getPrefTypeAlign(LargestType), PrefAlign);
631
632 int64_t ToRemoveStart = 0;
633 uint64_t ToRemoveSize = 0;
634 // Compute start and size of the region to remove. Make sure 'PrefAlign' is
635 // maintained on the remaining store.
636 if (IsOverwriteEnd) {
637 // Calculate required adjustment for 'KillingStart' in order to keep
638 // remaining store size aligned on 'PerfAlign'.
639 uint64_t Off =
640 offsetToAlignment(uint64_t(KillingStart - DeadStart), PrefAlign);
641 ToRemoveStart = KillingStart + Off;
642 if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))
643 return false;
644 ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);
645 } else {
646 ToRemoveStart = DeadStart;
647 assert(KillingSize >= uint64_t(DeadStart - KillingStart) &&
648 "Not overlapping accesses?");
649 ToRemoveSize = KillingSize - uint64_t(DeadStart - KillingStart);
650 // Calculate required adjustment for 'ToRemoveSize'in order to keep
651 // start of the remaining store aligned on 'PerfAlign'.
652 uint64_t Off = offsetToAlignment(ToRemoveSize, PrefAlign);
653 if (Off != 0) {
654 if (ToRemoveSize <= (PrefAlign.value() - Off))
655 return false;
656 ToRemoveSize -= PrefAlign.value() - Off;
657 }
658 assert(isAligned(PrefAlign, ToRemoveSize) &&
659 "Should preserve selected alignment");
660 }
661
662 assert(ToRemoveSize > 0 && "Shouldn't reach here if nothing to remove");
663 assert(DeadSize > ToRemoveSize && "Can't remove more than original size");
664
665 uint64_t NewSize = DeadSize - ToRemoveSize;
666 if (DeadIntrinsic->isAtomic()) {
667 // When shortening an atomic memory intrinsic, the newly shortened
668 // length must remain an integer multiple of the element size.
669 const uint32_t ElementSize = DeadIntrinsic->getElementSizeInBytes();
670 if (0 != NewSize % ElementSize)
671 return false;
672 }
673
674 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
675 << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *DeadI
676 << "\n KILLER [" << ToRemoveStart << ", "
677 << int64_t(ToRemoveStart + ToRemoveSize) << ")\n");
678
679 DeadIntrinsic->setLength(NewSize);
680 DeadIntrinsic->setDestAlignment(PrefAlign);
681
682 Value *OrigDest = DeadIntrinsic->getRawDest();
683 if (!IsOverwriteEnd) {
684 Value *Indices[1] = {
685 ConstantInt::get(DeadIntrinsic->getLength()->getType(), ToRemoveSize)};
687 Type::getInt8Ty(DeadIntrinsic->getContext()), OrigDest, Indices, "",
688 DeadI->getIterator());
689 NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
690 DeadIntrinsic->setDest(NewDestGEP);
691 adjustArgAttributes(DeadIntrinsic, 0, ToRemoveSize);
692 }
693
694 // Update attached dbg.assign intrinsics. Assume 8-bit byte.
695 shortenAssignment(DeadI, OrigDest, DeadStart * 8, DeadSize * 8, NewSize * 8,
696 IsOverwriteEnd);
697
698 // Finally update start and size of dead access.
699 if (!IsOverwriteEnd)
700 DeadStart += ToRemoveSize;
701 DeadSize = NewSize;
702
703 return true;
704}
705
707 int64_t &DeadStart, uint64_t &DeadSize) {
708 if (IntervalMap.empty() || !isShortenableAtTheEnd(DeadI))
709 return false;
710
711 OverlapIntervalsTy::iterator OII = --IntervalMap.end();
712 int64_t KillingStart = OII->second;
713 uint64_t KillingSize = OII->first - KillingStart;
714
715 assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
716
717 if (KillingStart > DeadStart &&
718 // Note: "KillingStart - KillingStart" is known to be positive due to
719 // preceding check.
720 (uint64_t)(KillingStart - DeadStart) < DeadSize &&
721 // Note: "DeadSize - (uint64_t)(KillingStart - DeadStart)" is known to
722 // be non negative due to preceding checks.
723 KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {
724 if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
725 true)) {
726 IntervalMap.erase(OII);
727 return true;
728 }
729 }
730 return false;
731}
732
735 int64_t &DeadStart, uint64_t &DeadSize) {
737 return false;
738
739 OverlapIntervalsTy::iterator OII = IntervalMap.begin();
740 int64_t KillingStart = OII->second;
741 uint64_t KillingSize = OII->first - KillingStart;
742
743 assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
744
745 if (KillingStart <= DeadStart &&
746 // Note: "DeadStart - KillingStart" is known to be non negative due to
747 // preceding check.
748 KillingSize > (uint64_t)(DeadStart - KillingStart)) {
749 // Note: "KillingSize - (uint64_t)(DeadStart - DeadStart)" is known to
750 // be positive due to preceding checks.
751 assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&
752 "Should have been handled as OW_Complete");
753 if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
754 false)) {
755 IntervalMap.erase(OII);
756 return true;
757 }
758 }
759 return false;
760}
761
762static Constant *
764 int64_t KillingOffset, int64_t DeadOffset,
765 const DataLayout &DL, BatchAAResults &AA,
766 DominatorTree *DT) {
767
768 if (DeadI && isa<ConstantInt>(DeadI->getValueOperand()) &&
769 DL.typeSizeEqualsStoreSize(DeadI->getValueOperand()->getType()) &&
770 KillingI && isa<ConstantInt>(KillingI->getValueOperand()) &&
771 DL.typeSizeEqualsStoreSize(KillingI->getValueOperand()->getType()) &&
772 memoryIsNotModifiedBetween(DeadI, KillingI, AA, DL, DT)) {
773 // If the store we find is:
774 // a) partially overwritten by the store to 'Loc'
775 // b) the killing store is fully contained in the dead one and
776 // c) they both have a constant value
777 // d) none of the two stores need padding
778 // Merge the two stores, replacing the dead store's value with a
779 // merge of both values.
780 // TODO: Deal with other constant types (vectors, etc), and probably
781 // some mem intrinsics (if needed)
782
783 APInt DeadValue = cast<ConstantInt>(DeadI->getValueOperand())->getValue();
784 APInt KillingValue =
785 cast<ConstantInt>(KillingI->getValueOperand())->getValue();
786 unsigned KillingBits = KillingValue.getBitWidth();
787 assert(DeadValue.getBitWidth() > KillingValue.getBitWidth());
788 KillingValue = KillingValue.zext(DeadValue.getBitWidth());
789
790 // Offset of the smaller store inside the larger store
791 unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * 8;
792 unsigned LShiftAmount =
793 DL.isBigEndian() ? DeadValue.getBitWidth() - BitOffsetDiff - KillingBits
794 : BitOffsetDiff;
795 APInt Mask = APInt::getBitsSet(DeadValue.getBitWidth(), LShiftAmount,
796 LShiftAmount + KillingBits);
797 // Clear the bits we'll be replacing, then OR with the smaller
798 // store, shifted appropriately.
799 APInt Merged = (DeadValue & ~Mask) | (KillingValue << LShiftAmount);
800 LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Dead: " << *DeadI
801 << "\n Killing: " << *KillingI
802 << "\n Merged Value: " << Merged << '\n');
803 return ConstantInt::get(DeadI->getValueOperand()->getType(), Merged);
804 }
805 return nullptr;
806}
807
808namespace {
809// Returns true if \p I is an intrinsic that does not read or write memory.
810bool isNoopIntrinsic(Instruction *I) {
811 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
812 switch (II->getIntrinsicID()) {
813 case Intrinsic::lifetime_start:
814 case Intrinsic::lifetime_end:
815 case Intrinsic::invariant_end:
816 case Intrinsic::launder_invariant_group:
817 case Intrinsic::assume:
818 return true;
819 case Intrinsic::dbg_declare:
820 case Intrinsic::dbg_label:
821 case Intrinsic::dbg_value:
822 llvm_unreachable("Intrinsic should not be modeled in MemorySSA");
823 default:
824 return false;
825 }
826 }
827 return false;
828}
829
830// Check if we can ignore \p D for DSE.
831bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
832 Instruction *DI = D->getMemoryInst();
833 // Calls that only access inaccessible memory cannot read or write any memory
834 // locations we consider for elimination.
835 if (auto *CB = dyn_cast<CallBase>(DI))
836 if (CB->onlyAccessesInaccessibleMemory())
837 return true;
838
839 // We can eliminate stores to locations not visible to the caller across
840 // throwing instructions.
841 if (DI->mayThrow() && !DefVisibleToCaller)
842 return true;
843
844 // We can remove the dead stores, irrespective of the fence and its ordering
845 // (release/acquire/seq_cst). Fences only constraints the ordering of
846 // already visible stores, it does not make a store visible to other
847 // threads. So, skipping over a fence does not change a store from being
848 // dead.
849 if (isa<FenceInst>(DI))
850 return true;
851
852 // Skip intrinsics that do not really read or modify memory.
853 if (isNoopIntrinsic(DI))
854 return true;
855
856 return false;
857}
858
859// A memory location wrapper that represents a MemoryLocation, `MemLoc`,
860// defined by `MemDef`.
861struct MemoryLocationWrapper {
862 MemoryLocationWrapper(MemoryLocation MemLoc, MemoryDef *MemDef,
863 bool DefByInitializesAttr)
864 : MemLoc(MemLoc), MemDef(MemDef),
865 DefByInitializesAttr(DefByInitializesAttr) {
866 assert(MemLoc.Ptr && "MemLoc should be not null");
868 DefInst = MemDef->getMemoryInst();
869 }
870
871 MemoryLocation MemLoc;
872 const Value *UnderlyingObject;
873 MemoryDef *MemDef;
874 Instruction *DefInst;
875 bool DefByInitializesAttr = false;
876};
877
878// A memory def wrapper that represents a MemoryDef and the MemoryLocation(s)
879// defined by this MemoryDef.
880struct MemoryDefWrapper {
881 MemoryDefWrapper(MemoryDef *MemDef,
882 ArrayRef<std::pair<MemoryLocation, bool>> MemLocations) {
883 DefInst = MemDef->getMemoryInst();
884 for (auto &[MemLoc, DefByInitializesAttr] : MemLocations)
885 DefinedLocations.push_back(
886 MemoryLocationWrapper(MemLoc, MemDef, DefByInitializesAttr));
887 }
888 Instruction *DefInst;
890};
891
892bool hasInitializesAttr(Instruction *I) {
893 CallBase *CB = dyn_cast<CallBase>(I);
894 return CB && CB->getArgOperandWithAttribute(Attribute::Initializes);
895}
896
897struct ArgumentInitInfo {
898 unsigned Idx;
899 bool IsDeadOrInvisibleOnUnwind;
900 ConstantRangeList Inits;
901};
902
903// Return the intersected range list of the initializes attributes of "Args".
904// "Args" are call arguments that alias to each other.
905// If any argument in "Args" doesn't have dead_on_unwind attr and
906// "CallHasNoUnwindAttr" is false, return empty.
907ConstantRangeList getIntersectedInitRangeList(ArrayRef<ArgumentInitInfo> Args,
908 bool CallHasNoUnwindAttr) {
909 if (Args.empty())
910 return {};
911
912 // To address unwind, the function should have nounwind attribute or the
913 // arguments have dead or invisible on unwind. Otherwise, return empty.
914 for (const auto &Arg : Args) {
915 if (!CallHasNoUnwindAttr && !Arg.IsDeadOrInvisibleOnUnwind)
916 return {};
917 if (Arg.Inits.empty())
918 return {};
919 }
920
921 ConstantRangeList IntersectedIntervals = Args.front().Inits;
922 for (auto &Arg : Args.drop_front())
923 IntersectedIntervals = IntersectedIntervals.intersectWith(Arg.Inits);
924
925 return IntersectedIntervals;
926}
927
928struct DSEState {
929 Function &F;
930 AliasAnalysis &AA;
932
933 /// The single BatchAA instance that is used to cache AA queries. It will
934 /// not be invalidated over the whole run. This is safe, because:
935 /// 1. Only memory writes are removed, so the alias cache for memory
936 /// locations remains valid.
937 /// 2. No new instructions are added (only instructions removed), so cached
938 /// information for a deleted value cannot be accessed by a re-used new
939 /// value pointer.
940 BatchAAResults BatchAA;
941
942 MemorySSA &MSSA;
943 DominatorTree &DT;
945 const TargetLibraryInfo &TLI;
946 const DataLayout &DL;
947 const LoopInfo &LI;
948
949 // Whether the function contains any irreducible control flow, useful for
950 // being accurately able to detect loops.
951 bool ContainsIrreducibleLoops;
952
953 // All MemoryDefs that potentially could kill other MemDefs.
955 // Any that should be skipped as they are already deleted
957 // Keep track whether a given object is captured before return or not.
958 DenseMap<const Value *, bool> CapturedBeforeReturn;
959 // Keep track of all of the objects that are invisible to the caller after
960 // the function returns.
961 DenseMap<const Value *, bool> InvisibleToCallerAfterRet;
962 // Keep track of blocks with throwing instructions not modeled in MemorySSA.
963 SmallPtrSet<BasicBlock *, 16> ThrowingBlocks;
964 // Post-order numbers for each basic block. Used to figure out if memory
965 // accesses are executed before another access.
966 DenseMap<BasicBlock *, unsigned> PostOrderNumbers;
967
968 /// Keep track of instructions (partly) overlapping with killing MemoryDefs per
969 /// basic block.
971 // Check if there are root nodes that are terminated by UnreachableInst.
972 // Those roots pessimize post-dominance queries. If there are such roots,
973 // fall back to CFG scan starting from all non-unreachable roots.
974 bool AnyUnreachableExit;
975
976 // Whether or not we should iterate on removing dead stores at the end of the
977 // function due to removing a store causing a previously captured pointer to
978 // no longer be captured.
979 bool ShouldIterateEndOfFunctionDSE;
980
981 /// Dead instructions to be removed at the end of DSE.
983
984 // Class contains self-reference, make sure it's not copied/moved.
985 DSEState(const DSEState &) = delete;
986 DSEState &operator=(const DSEState &) = delete;
987
988 DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
989 PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
990 const LoopInfo &LI)
991 : F(F), AA(AA), EA(DT, &LI), BatchAA(AA, &EA), MSSA(MSSA), DT(DT),
992 PDT(PDT), TLI(TLI), DL(F.getDataLayout()), LI(LI) {
993 // Collect blocks with throwing instructions not modeled in MemorySSA and
994 // alloc-like objects.
995 unsigned PO = 0;
996 for (BasicBlock *BB : post_order(&F)) {
997 PostOrderNumbers[BB] = PO++;
998 for (Instruction &I : *BB) {
999 MemoryAccess *MA = MSSA.getMemoryAccess(&I);
1000 if (I.mayThrow() && !MA)
1001 ThrowingBlocks.insert(I.getParent());
1002
1003 auto *MD = dyn_cast_or_null<MemoryDef>(MA);
1004 if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
1005 (getLocForWrite(&I) || isMemTerminatorInst(&I) ||
1006 (EnableInitializesImprovement && hasInitializesAttr(&I))))
1007 MemDefs.push_back(MD);
1008 }
1009 }
1010
1011 // Treat byval, inalloca or dead on return arguments the same as Allocas,
1012 // stores to them are dead at the end of the function.
1013 for (Argument &AI : F.args())
1014 if (AI.hasPassPointeeByValueCopyAttr() || AI.hasDeadOnReturnAttr())
1015 InvisibleToCallerAfterRet.insert({&AI, true});
1016
1017 // Collect whether there is any irreducible control flow in the function.
1018 ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);
1019
1020 AnyUnreachableExit = any_of(PDT.roots(), [](const BasicBlock *E) {
1021 return isa<UnreachableInst>(E->getTerminator());
1022 });
1023 }
1024
1025 static void pushMemUses(MemoryAccess *Acc,
1028 for (Use &U : Acc->uses()) {
1029 auto *MA = cast<MemoryAccess>(U.getUser());
1030 if (Visited.insert(MA).second)
1031 WorkList.push_back(MA);
1032 }
1033 };
1034
1035 LocationSize strengthenLocationSize(const Instruction *I,
1036 LocationSize Size) const {
1037 if (auto *CB = dyn_cast<CallBase>(I)) {
1038 LibFunc F;
1039 if (TLI.getLibFunc(*CB, F) && TLI.has(F) &&
1040 (F == LibFunc_memset_chk || F == LibFunc_memcpy_chk)) {
1041 // Use the precise location size specified by the 3rd argument
1042 // for determining KillingI overwrites DeadLoc if it is a memset_chk
1043 // instruction. memset_chk will write either the amount specified as 3rd
1044 // argument or the function will immediately abort and exit the program.
1045 // NOTE: AA may determine NoAlias if it can prove that the access size
1046 // is larger than the allocation size due to that being UB. To avoid
1047 // returning potentially invalid NoAlias results by AA, limit the use of
1048 // the precise location size to isOverwrite.
1049 if (const auto *Len = dyn_cast<ConstantInt>(CB->getArgOperand(2)))
1050 return LocationSize::precise(Len->getZExtValue());
1051 }
1052 }
1053 return Size;
1054 }
1055
1056 /// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p
1057 /// KillingI instruction) completely overwrites a store to the 'DeadLoc'
1058 /// location (by \p DeadI instruction).
1059 /// Return OW_MaybePartial if \p KillingI does not completely overwrite
1060 /// \p DeadI, but they both write to the same underlying object. In that
1061 /// case, use isPartialOverwrite to check if \p KillingI partially overwrites
1062 /// \p DeadI. Returns 'OR_None' if \p KillingI is known to not overwrite the
1063 /// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
1064 OverwriteResult isOverwrite(const Instruction *KillingI,
1065 const Instruction *DeadI,
1066 const MemoryLocation &KillingLoc,
1067 const MemoryLocation &DeadLoc,
1068 int64_t &KillingOff, int64_t &DeadOff) {
1069 // AliasAnalysis does not always account for loops. Limit overwrite checks
1070 // to dependencies for which we can guarantee they are independent of any
1071 // loops they are in.
1072 if (!isGuaranteedLoopIndependent(DeadI, KillingI, DeadLoc))
1073 return OW_Unknown;
1074
1075 LocationSize KillingLocSize =
1076 strengthenLocationSize(KillingI, KillingLoc.Size);
1077 const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();
1078 const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();
1079 const Value *DeadUndObj = getUnderlyingObject(DeadPtr);
1080 const Value *KillingUndObj = getUnderlyingObject(KillingPtr);
1081
1082 // Check whether the killing store overwrites the whole object, in which
1083 // case the size/offset of the dead store does not matter.
1084 if (DeadUndObj == KillingUndObj && KillingLocSize.isPrecise() &&
1085 isIdentifiedObject(KillingUndObj)) {
1086 std::optional<TypeSize> KillingUndObjSize =
1087 getPointerSize(KillingUndObj, DL, TLI, &F);
1088 if (KillingUndObjSize && *KillingUndObjSize == KillingLocSize.getValue())
1089 return OW_Complete;
1090 }
1091
1092 // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
1093 // get imprecise values here, though (except for unknown sizes).
1094 if (!KillingLocSize.isPrecise() || !DeadLoc.Size.isPrecise()) {
1095 // In case no constant size is known, try to an IR values for the number
1096 // of bytes written and check if they match.
1097 const auto *KillingMemI = dyn_cast<MemIntrinsic>(KillingI);
1098 const auto *DeadMemI = dyn_cast<MemIntrinsic>(DeadI);
1099 if (KillingMemI && DeadMemI) {
1100 const Value *KillingV = KillingMemI->getLength();
1101 const Value *DeadV = DeadMemI->getLength();
1102 if (KillingV == DeadV && BatchAA.isMustAlias(DeadLoc, KillingLoc))
1103 return OW_Complete;
1104 }
1105
1106 // Masked stores have imprecise locations, but we can reason about them
1107 // to some extent.
1108 return isMaskedStoreOverwrite(KillingI, DeadI, BatchAA);
1109 }
1110
1111 const TypeSize KillingSize = KillingLocSize.getValue();
1112 const TypeSize DeadSize = DeadLoc.Size.getValue();
1113 // Bail on doing Size comparison which depends on AA for now
1114 // TODO: Remove AnyScalable once Alias Analysis deal with scalable vectors
1115 const bool AnyScalable =
1116 DeadSize.isScalable() || KillingLocSize.isScalable();
1117
1118 if (AnyScalable)
1119 return OW_Unknown;
1120 // Query the alias information
1121 AliasResult AAR = BatchAA.alias(KillingLoc, DeadLoc);
1122
1123 // If the start pointers are the same, we just have to compare sizes to see if
1124 // the killing store was larger than the dead store.
1125 if (AAR == AliasResult::MustAlias) {
1126 // Make sure that the KillingSize size is >= the DeadSize size.
1127 if (KillingSize >= DeadSize)
1128 return OW_Complete;
1129 }
1130
1131 // If we hit a partial alias we may have a full overwrite
1132 if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
1133 int32_t Off = AAR.getOffset();
1134 if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)
1135 return OW_Complete;
1136 }
1137
1138 // If we can't resolve the same pointers to the same object, then we can't
1139 // analyze them at all.
1140 if (DeadUndObj != KillingUndObj) {
1141 // Non aliasing stores to different objects don't overlap. Note that
1142 // if the killing store is known to overwrite whole object (out of
1143 // bounds access overwrites whole object as well) then it is assumed to
1144 // completely overwrite any store to the same object even if they don't
1145 // actually alias (see next check).
1146 if (AAR == AliasResult::NoAlias)
1147 return OW_None;
1148 return OW_Unknown;
1149 }
1150
1151 // Okay, we have stores to two completely different pointers. Try to
1152 // decompose the pointer into a "base + constant_offset" form. If the base
1153 // pointers are equal, then we can reason about the two stores.
1154 DeadOff = 0;
1155 KillingOff = 0;
1156 const Value *DeadBasePtr =
1157 GetPointerBaseWithConstantOffset(DeadPtr, DeadOff, DL);
1158 const Value *KillingBasePtr =
1159 GetPointerBaseWithConstantOffset(KillingPtr, KillingOff, DL);
1160
1161 // If the base pointers still differ, we have two completely different
1162 // stores.
1163 if (DeadBasePtr != KillingBasePtr)
1164 return OW_Unknown;
1165
1166 // The killing access completely overlaps the dead store if and only if
1167 // both start and end of the dead one is "inside" the killing one:
1168 // |<->|--dead--|<->|
1169 // |-----killing------|
1170 // Accesses may overlap if and only if start of one of them is "inside"
1171 // another one:
1172 // |<->|--dead--|<-------->|
1173 // |-------killing--------|
1174 // OR
1175 // |-------dead-------|
1176 // |<->|---killing---|<----->|
1177 //
1178 // We have to be careful here as *Off is signed while *.Size is unsigned.
1179
1180 // Check if the dead access starts "not before" the killing one.
1181 if (DeadOff >= KillingOff) {
1182 // If the dead access ends "not after" the killing access then the
1183 // dead one is completely overwritten by the killing one.
1184 if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
1185 return OW_Complete;
1186 // If start of the dead access is "before" end of the killing access
1187 // then accesses overlap.
1188 else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
1189 return OW_MaybePartial;
1190 }
1191 // If start of the killing access is "before" end of the dead access then
1192 // accesses overlap.
1193 else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
1194 return OW_MaybePartial;
1195 }
1196
1197 // Can reach here only if accesses are known not to overlap.
1198 return OW_None;
1199 }
1200
1201 bool isInvisibleToCallerAfterRet(const Value *V) {
1202 if (isa<AllocaInst>(V))
1203 return true;
1204
1205 auto I = InvisibleToCallerAfterRet.insert({V, false});
1206 if (I.second && isInvisibleToCallerOnUnwind(V) && isNoAliasCall(V))
1207 I.first->second = capturesNothing(PointerMayBeCaptured(
1208 V, /*ReturnCaptures=*/true, CaptureComponents::Provenance));
1209 return I.first->second;
1210 }
1211
1212 bool isInvisibleToCallerOnUnwind(const Value *V) {
1213 bool RequiresNoCaptureBeforeUnwind;
1214 if (!isNotVisibleOnUnwind(V, RequiresNoCaptureBeforeUnwind))
1215 return false;
1216 if (!RequiresNoCaptureBeforeUnwind)
1217 return true;
1218
1219 auto I = CapturedBeforeReturn.insert({V, true});
1220 if (I.second)
1221 // NOTE: This could be made more precise by PointerMayBeCapturedBefore
1222 // with the killing MemoryDef. But we refrain from doing so for now to
1223 // limit compile-time and this does not cause any changes to the number
1224 // of stores removed on a large test set in practice.
1225 I.first->second = capturesAnything(PointerMayBeCaptured(
1226 V, /*ReturnCaptures=*/false, CaptureComponents::Provenance));
1227 return !I.first->second;
1228 }
1229
1230 std::optional<MemoryLocation> getLocForWrite(Instruction *I) const {
1231 if (!I->mayWriteToMemory())
1232 return std::nullopt;
1233
1234 if (auto *CB = dyn_cast<CallBase>(I))
1235 return MemoryLocation::getForDest(CB, TLI);
1236
1238 }
1239
1240 // Returns a list of <MemoryLocation, bool> pairs written by I.
1241 // The bool means whether the write is from Initializes attr.
1243 getLocForInst(Instruction *I, bool ConsiderInitializesAttr) {
1245 if (isMemTerminatorInst(I)) {
1246 if (auto Loc = getLocForTerminator(I))
1247 Locations.push_back(std::make_pair(Loc->first, false));
1248 return Locations;
1249 }
1250
1251 if (auto Loc = getLocForWrite(I))
1252 Locations.push_back(std::make_pair(*Loc, false));
1253
1254 if (ConsiderInitializesAttr) {
1255 for (auto &MemLoc : getInitializesArgMemLoc(I)) {
1256 Locations.push_back(std::make_pair(MemLoc, true));
1257 }
1258 }
1259 return Locations;
1260 }
1261
1262 /// Assuming this instruction has a dead analyzable write, can we delete
1263 /// this instruction?
1264 bool isRemovable(Instruction *I) {
1265 assert(getLocForWrite(I) && "Must have analyzable write");
1266
1267 // Don't remove volatile/atomic stores.
1268 if (StoreInst *SI = dyn_cast<StoreInst>(I))
1269 return SI->isUnordered();
1270
1271 if (auto *CB = dyn_cast<CallBase>(I)) {
1272 // Don't remove volatile memory intrinsics.
1273 if (auto *MI = dyn_cast<MemIntrinsic>(CB))
1274 return !MI->isVolatile();
1275
1276 // Never remove dead lifetime intrinsics, e.g. because they are followed
1277 // by a free.
1278 if (CB->isLifetimeStartOrEnd())
1279 return false;
1280
1281 return CB->use_empty() && CB->willReturn() && CB->doesNotThrow() &&
1282 !CB->isTerminator();
1283 }
1284
1285 return false;
1286 }
1287
1288 /// Returns true if \p UseInst completely overwrites \p DefLoc
1289 /// (stored by \p DefInst).
1290 bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst,
1291 Instruction *UseInst) {
1292 // UseInst has a MemoryDef associated in MemorySSA. It's possible for a
1293 // MemoryDef to not write to memory, e.g. a volatile load is modeled as a
1294 // MemoryDef.
1295 if (!UseInst->mayWriteToMemory())
1296 return false;
1297
1298 if (auto *CB = dyn_cast<CallBase>(UseInst))
1300 return false;
1301
1302 int64_t InstWriteOffset, DepWriteOffset;
1303 if (auto CC = getLocForWrite(UseInst))
1304 return isOverwrite(UseInst, DefInst, *CC, DefLoc, InstWriteOffset,
1305 DepWriteOffset) == OW_Complete;
1306 return false;
1307 }
1308
1309 /// Returns true if \p Def is not read before returning from the function.
1310 bool isWriteAtEndOfFunction(MemoryDef *Def, const MemoryLocation &DefLoc) {
1311 LLVM_DEBUG(dbgs() << " Check if def " << *Def << " ("
1312 << *Def->getMemoryInst()
1313 << ") is at the end the function \n");
1316
1317 pushMemUses(Def, WorkList, Visited);
1318 for (unsigned I = 0; I < WorkList.size(); I++) {
1319 if (WorkList.size() >= MemorySSAScanLimit) {
1320 LLVM_DEBUG(dbgs() << " ... hit exploration limit.\n");
1321 return false;
1322 }
1323
1324 MemoryAccess *UseAccess = WorkList[I];
1325 if (isa<MemoryPhi>(UseAccess)) {
1326 // AliasAnalysis does not account for loops. Limit elimination to
1327 // candidates for which we can guarantee they always store to the same
1328 // memory location.
1329 if (!isGuaranteedLoopInvariant(DefLoc.Ptr))
1330 return false;
1331
1332 pushMemUses(cast<MemoryPhi>(UseAccess), WorkList, Visited);
1333 continue;
1334 }
1335 // TODO: Checking for aliasing is expensive. Consider reducing the amount
1336 // of times this is called and/or caching it.
1337 Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1338 if (isReadClobber(DefLoc, UseInst)) {
1339 LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n");
1340 return false;
1341 }
1342
1343 if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess))
1344 pushMemUses(UseDef, WorkList, Visited);
1345 }
1346 return true;
1347 }
1348
1349 /// If \p I is a memory terminator like llvm.lifetime.end or free, return a
1350 /// pair with the MemoryLocation terminated by \p I and a boolean flag
1351 /// indicating whether \p I is a free-like call.
1352 std::optional<std::pair<MemoryLocation, bool>>
1353 getLocForTerminator(Instruction *I) const {
1354 if (auto *CB = dyn_cast<CallBase>(I)) {
1355 if (CB->getIntrinsicID() == Intrinsic::lifetime_end)
1356 return {
1357 std::make_pair(MemoryLocation::getForArgument(CB, 0, &TLI), false)};
1358 if (Value *FreedOp = getFreedOperand(CB, &TLI))
1359 return {std::make_pair(MemoryLocation::getAfter(FreedOp), true)};
1360 }
1361
1362 return std::nullopt;
1363 }
1364
1365 /// Returns true if \p I is a memory terminator instruction like
1366 /// llvm.lifetime.end or free.
1367 bool isMemTerminatorInst(Instruction *I) const {
1368 auto *CB = dyn_cast<CallBase>(I);
1369 return CB && (CB->getIntrinsicID() == Intrinsic::lifetime_end ||
1370 getFreedOperand(CB, &TLI) != nullptr);
1371 }
1372
1373 /// Returns true if \p MaybeTerm is a memory terminator for \p Loc from
1374 /// instruction \p AccessI.
1375 bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
1376 Instruction *MaybeTerm) {
1377 std::optional<std::pair<MemoryLocation, bool>> MaybeTermLoc =
1378 getLocForTerminator(MaybeTerm);
1379
1380 if (!MaybeTermLoc)
1381 return false;
1382
1383 // If the terminator is a free-like call, all accesses to the underlying
1384 // object can be considered terminated.
1385 if (getUnderlyingObject(Loc.Ptr) !=
1386 getUnderlyingObject(MaybeTermLoc->first.Ptr))
1387 return false;
1388
1389 auto TermLoc = MaybeTermLoc->first;
1390 if (MaybeTermLoc->second) {
1391 const Value *LocUO = getUnderlyingObject(Loc.Ptr);
1392 return BatchAA.isMustAlias(TermLoc.Ptr, LocUO);
1393 }
1394 int64_t InstWriteOffset = 0;
1395 int64_t DepWriteOffset = 0;
1396 return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, InstWriteOffset,
1397 DepWriteOffset) == OW_Complete;
1398 }
1399
1400 // Returns true if \p Use may read from \p DefLoc.
1401 bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst) {
1402 if (isNoopIntrinsic(UseInst))
1403 return false;
1404
1405 // Monotonic or weaker atomic stores can be re-ordered and do not need to be
1406 // treated as read clobber.
1407 if (auto SI = dyn_cast<StoreInst>(UseInst))
1408 return isStrongerThan(SI->getOrdering(), AtomicOrdering::Monotonic);
1409
1410 if (!UseInst->mayReadFromMemory())
1411 return false;
1412
1413 if (auto *CB = dyn_cast<CallBase>(UseInst))
1415 return false;
1416
1417 return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));
1418 }
1419
1420 /// Returns true if a dependency between \p Current and \p KillingDef is
1421 /// guaranteed to be loop invariant for the loops that they are in. Either
1422 /// because they are known to be in the same block, in the same loop level or
1423 /// by guaranteeing that \p CurrentLoc only references a single MemoryLocation
1424 /// during execution of the containing function.
1425 bool isGuaranteedLoopIndependent(const Instruction *Current,
1426 const Instruction *KillingDef,
1427 const MemoryLocation &CurrentLoc) {
1428 // If the dependency is within the same block or loop level (being careful
1429 // of irreducible loops), we know that AA will return a valid result for the
1430 // memory dependency. (Both at the function level, outside of any loop,
1431 // would also be valid but we currently disable that to limit compile time).
1432 if (Current->getParent() == KillingDef->getParent())
1433 return true;
1434 const Loop *CurrentLI = LI.getLoopFor(Current->getParent());
1435 if (!ContainsIrreducibleLoops && CurrentLI &&
1436 CurrentLI == LI.getLoopFor(KillingDef->getParent()))
1437 return true;
1438 // Otherwise check the memory location is invariant to any loops.
1439 return isGuaranteedLoopInvariant(CurrentLoc.Ptr);
1440 }
1441
1442 /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
1443 /// loop. In particular, this guarantees that it only references a single
1444 /// MemoryLocation during execution of the containing function.
1445 bool isGuaranteedLoopInvariant(const Value *Ptr) {
1446 Ptr = Ptr->stripPointerCasts();
1447 if (auto *GEP = dyn_cast<GEPOperator>(Ptr))
1448 if (GEP->hasAllConstantIndices())
1449 Ptr = GEP->getPointerOperand()->stripPointerCasts();
1450
1451 if (auto *I = dyn_cast<Instruction>(Ptr)) {
1452 return I->getParent()->isEntryBlock() ||
1453 (!ContainsIrreducibleLoops && !LI.getLoopFor(I->getParent()));
1454 }
1455 return true;
1456 }
1457
1458 // Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,
1459 // with no read access between them or on any other path to a function exit
1460 // block if \p KillingLoc is not accessible after the function returns. If
1461 // there is no such MemoryDef, return std::nullopt. The returned value may not
1462 // (completely) overwrite \p KillingLoc. Currently we bail out when we
1463 // encounter an aliasing MemoryUse (read).
1464 std::optional<MemoryAccess *>
1465 getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
1466 const MemoryLocation &KillingLoc, const Value *KillingUndObj,
1467 unsigned &ScanLimit, unsigned &WalkerStepLimit,
1468 bool IsMemTerm, unsigned &PartialLimit,
1469 bool IsInitializesAttrMemLoc) {
1470 if (ScanLimit == 0 || WalkerStepLimit == 0) {
1471 LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1472 return std::nullopt;
1473 }
1474
1475 MemoryAccess *Current = StartAccess;
1476 Instruction *KillingI = KillingDef->getMemoryInst();
1477 LLVM_DEBUG(dbgs() << " trying to get dominating access\n");
1478
1479 // Only optimize defining access of KillingDef when directly starting at its
1480 // defining access. The defining access also must only access KillingLoc. At
1481 // the moment we only support instructions with a single write location, so
1482 // it should be sufficient to disable optimizations for instructions that
1483 // also read from memory.
1484 bool CanOptimize = OptimizeMemorySSA &&
1485 KillingDef->getDefiningAccess() == StartAccess &&
1486 !KillingI->mayReadFromMemory();
1487
1488 // Find the next clobbering Mod access for DefLoc, starting at StartAccess.
1489 std::optional<MemoryLocation> CurrentLoc;
1490 for (;; Current = cast<MemoryDef>(Current)->getDefiningAccess()) {
1491 LLVM_DEBUG({
1492 dbgs() << " visiting " << *Current;
1493 if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current))
1494 dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst()
1495 << ")";
1496 dbgs() << "\n";
1497 });
1498
1499 // Reached TOP.
1500 if (MSSA.isLiveOnEntryDef(Current)) {
1501 LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n");
1502 if (CanOptimize && Current != KillingDef->getDefiningAccess())
1503 // The first clobbering def is... none.
1504 KillingDef->setOptimized(Current);
1505 return std::nullopt;
1506 }
1507
1508 // Cost of a step. Accesses in the same block are more likely to be valid
1509 // candidates for elimination, hence consider them cheaper.
1510 unsigned StepCost = KillingDef->getBlock() == Current->getBlock()
1513 if (WalkerStepLimit <= StepCost) {
1514 LLVM_DEBUG(dbgs() << " ... hit walker step limit\n");
1515 return std::nullopt;
1516 }
1517 WalkerStepLimit -= StepCost;
1518
1519 // Return for MemoryPhis. They cannot be eliminated directly and the
1520 // caller is responsible for traversing them.
1521 if (isa<MemoryPhi>(Current)) {
1522 LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n");
1523 return Current;
1524 }
1525
1526 // Below, check if CurrentDef is a valid candidate to be eliminated by
1527 // KillingDef. If it is not, check the next candidate.
1528 MemoryDef *CurrentDef = cast<MemoryDef>(Current);
1529 Instruction *CurrentI = CurrentDef->getMemoryInst();
1530
1531 if (canSkipDef(CurrentDef, !isInvisibleToCallerOnUnwind(KillingUndObj))) {
1532 CanOptimize = false;
1533 continue;
1534 }
1535
1536 // Before we try to remove anything, check for any extra throwing
1537 // instructions that block us from DSEing
1538 if (mayThrowBetween(KillingI, CurrentI, KillingUndObj)) {
1539 LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");
1540 return std::nullopt;
1541 }
1542
1543 // Check for anything that looks like it will be a barrier to further
1544 // removal
1545 if (isDSEBarrier(KillingUndObj, CurrentI)) {
1546 LLVM_DEBUG(dbgs() << " ... skip, barrier\n");
1547 return std::nullopt;
1548 }
1549
1550 // If Current is known to be on path that reads DefLoc or is a read
1551 // clobber, bail out, as the path is not profitable. We skip this check
1552 // for intrinsic calls, because the code knows how to handle memcpy
1553 // intrinsics.
1554 if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(KillingLoc, CurrentI))
1555 return std::nullopt;
1556
1557 // Quick check if there are direct uses that are read-clobbers.
1558 if (any_of(Current->uses(), [this, &KillingLoc, StartAccess](Use &U) {
1559 if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser()))
1560 return !MSSA.dominates(StartAccess, UseOrDef) &&
1561 isReadClobber(KillingLoc, UseOrDef->getMemoryInst());
1562 return false;
1563 })) {
1564 LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
1565 return std::nullopt;
1566 }
1567
1568 // If Current does not have an analyzable write location or is not
1569 // removable, skip it.
1570 CurrentLoc = getLocForWrite(CurrentI);
1571 if (!CurrentLoc || !isRemovable(CurrentI)) {
1572 CanOptimize = false;
1573 continue;
1574 }
1575
1576 // AliasAnalysis does not account for loops. Limit elimination to
1577 // candidates for which we can guarantee they always store to the same
1578 // memory location and not located in different loops.
1579 if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) {
1580 LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n");
1581 CanOptimize = false;
1582 continue;
1583 }
1584
1585 if (IsMemTerm) {
1586 // If the killing def is a memory terminator (e.g. lifetime.end), check
1587 // the next candidate if the current Current does not write the same
1588 // underlying object as the terminator.
1589 if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) {
1590 CanOptimize = false;
1591 continue;
1592 }
1593 } else {
1594 int64_t KillingOffset = 0;
1595 int64_t DeadOffset = 0;
1596 auto OR = isOverwrite(KillingI, CurrentI, KillingLoc, *CurrentLoc,
1597 KillingOffset, DeadOffset);
1598 if (CanOptimize) {
1599 // CurrentDef is the earliest write clobber of KillingDef. Use it as
1600 // optimized access. Do not optimize if CurrentDef is already the
1601 // defining access of KillingDef.
1602 if (CurrentDef != KillingDef->getDefiningAccess() &&
1603 (OR == OW_Complete || OR == OW_MaybePartial))
1604 KillingDef->setOptimized(CurrentDef);
1605
1606 // Once a may-aliasing def is encountered do not set an optimized
1607 // access.
1608 if (OR != OW_None)
1609 CanOptimize = false;
1610 }
1611
1612 // If Current does not write to the same object as KillingDef, check
1613 // the next candidate.
1614 if (OR == OW_Unknown || OR == OW_None)
1615 continue;
1616 else if (OR == OW_MaybePartial) {
1617 // If KillingDef only partially overwrites Current, check the next
1618 // candidate if the partial step limit is exceeded. This aggressively
1619 // limits the number of candidates for partial store elimination,
1620 // which are less likely to be removable in the end.
1621 if (PartialLimit <= 1) {
1622 WalkerStepLimit -= 1;
1623 LLVM_DEBUG(dbgs() << " ... reached partial limit ... continue with next access\n");
1624 continue;
1625 }
1626 PartialLimit -= 1;
1627 }
1628 }
1629 break;
1630 };
1631
1632 // Accesses to objects accessible after the function returns can only be
1633 // eliminated if the access is dead along all paths to the exit. Collect
1634 // the blocks with killing (=completely overwriting MemoryDefs) and check if
1635 // they cover all paths from MaybeDeadAccess to any function exit.
1637 KillingDefs.insert(KillingDef->getMemoryInst());
1638 MemoryAccess *MaybeDeadAccess = Current;
1639 MemoryLocation MaybeDeadLoc = *CurrentLoc;
1640 Instruction *MaybeDeadI = cast<MemoryDef>(MaybeDeadAccess)->getMemoryInst();
1641 LLVM_DEBUG(dbgs() << " Checking for reads of " << *MaybeDeadAccess << " ("
1642 << *MaybeDeadI << ")\n");
1643
1646 pushMemUses(MaybeDeadAccess, WorkList, Visited);
1647
1648 // Check if DeadDef may be read.
1649 for (unsigned I = 0; I < WorkList.size(); I++) {
1650 MemoryAccess *UseAccess = WorkList[I];
1651
1652 LLVM_DEBUG(dbgs() << " " << *UseAccess);
1653 // Bail out if the number of accesses to check exceeds the scan limit.
1654 if (ScanLimit < (WorkList.size() - I)) {
1655 LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1656 return std::nullopt;
1657 }
1658 --ScanLimit;
1659 NumDomMemDefChecks++;
1660
1661 if (isa<MemoryPhi>(UseAccess)) {
1662 if (any_of(KillingDefs, [this, UseAccess](Instruction *KI) {
1663 return DT.properlyDominates(KI->getParent(),
1664 UseAccess->getBlock());
1665 })) {
1666 LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n");
1667 continue;
1668 }
1669 LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n");
1670 pushMemUses(UseAccess, WorkList, Visited);
1671 continue;
1672 }
1673
1674 Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1675 LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");
1676
1677 if (any_of(KillingDefs, [this, UseInst](Instruction *KI) {
1678 return DT.dominates(KI, UseInst);
1679 })) {
1680 LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n");
1681 continue;
1682 }
1683
1684 // A memory terminator kills all preceeding MemoryDefs and all succeeding
1685 // MemoryAccesses. We do not have to check it's users.
1686 if (isMemTerminator(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1687 LLVM_DEBUG(
1688 dbgs()
1689 << " ... skipping, memterminator invalidates following accesses\n");
1690 continue;
1691 }
1692
1693 if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess)->getMemoryInst())) {
1694 LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n");
1695 pushMemUses(UseAccess, WorkList, Visited);
1696 continue;
1697 }
1698
1699 if (UseInst->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj)) {
1700 LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
1701 return std::nullopt;
1702 }
1703
1704 // Uses which may read the original MemoryDef mean we cannot eliminate the
1705 // original MD. Stop walk.
1706 // If KillingDef is a CallInst with "initializes" attribute, the reads in
1707 // the callee would be dominated by initializations, so it should be safe.
1708 bool IsKillingDefFromInitAttr = false;
1709 if (IsInitializesAttrMemLoc) {
1710 if (KillingI == UseInst &&
1711 KillingUndObj == getUnderlyingObject(MaybeDeadLoc.Ptr))
1712 IsKillingDefFromInitAttr = true;
1713 }
1714
1715 if (isReadClobber(MaybeDeadLoc, UseInst) && !IsKillingDefFromInitAttr) {
1716 LLVM_DEBUG(dbgs() << " ... found read clobber\n");
1717 return std::nullopt;
1718 }
1719
1720 // If this worklist walks back to the original memory access (and the
1721 // pointer is not guarenteed loop invariant) then we cannot assume that a
1722 // store kills itself.
1723 if (MaybeDeadAccess == UseAccess &&
1724 !isGuaranteedLoopInvariant(MaybeDeadLoc.Ptr)) {
1725 LLVM_DEBUG(dbgs() << " ... found not loop invariant self access\n");
1726 return std::nullopt;
1727 }
1728 // Otherwise, for the KillingDef and MaybeDeadAccess we only have to check
1729 // if it reads the memory location.
1730 // TODO: It would probably be better to check for self-reads before
1731 // calling the function.
1732 if (KillingDef == UseAccess || MaybeDeadAccess == UseAccess) {
1733 LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n");
1734 continue;
1735 }
1736
1737 // Check all uses for MemoryDefs, except for defs completely overwriting
1738 // the original location. Otherwise we have to check uses of *all*
1739 // MemoryDefs we discover, including non-aliasing ones. Otherwise we might
1740 // miss cases like the following
1741 // 1 = Def(LoE) ; <----- DeadDef stores [0,1]
1742 // 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]
1743 // Use(2) ; MayAlias 2 *and* 1, loads [0, 3].
1744 // (The Use points to the *first* Def it may alias)
1745 // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
1746 // stores [0,1]
1747 if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
1748 if (isCompleteOverwrite(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1749 BasicBlock *MaybeKillingBlock = UseInst->getParent();
1750 if (PostOrderNumbers.find(MaybeKillingBlock)->second <
1751 PostOrderNumbers.find(MaybeDeadAccess->getBlock())->second) {
1752 if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
1754 << " ... found killing def " << *UseInst << "\n");
1755 KillingDefs.insert(UseInst);
1756 }
1757 } else {
1759 << " ... found preceeding def " << *UseInst << "\n");
1760 return std::nullopt;
1761 }
1762 } else
1763 pushMemUses(UseDef, WorkList, Visited);
1764 }
1765 }
1766
1767 // For accesses to locations visible after the function returns, make sure
1768 // that the location is dead (=overwritten) along all paths from
1769 // MaybeDeadAccess to the exit.
1770 if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
1771 SmallPtrSet<BasicBlock *, 16> KillingBlocks;
1772 for (Instruction *KD : KillingDefs)
1773 KillingBlocks.insert(KD->getParent());
1774 assert(!KillingBlocks.empty() &&
1775 "Expected at least a single killing block");
1776
1777 // Find the common post-dominator of all killing blocks.
1778 BasicBlock *CommonPred = *KillingBlocks.begin();
1779 for (BasicBlock *BB : llvm::drop_begin(KillingBlocks)) {
1780 if (!CommonPred)
1781 break;
1782 CommonPred = PDT.findNearestCommonDominator(CommonPred, BB);
1783 }
1784
1785 // If the common post-dominator does not post-dominate MaybeDeadAccess,
1786 // there is a path from MaybeDeadAccess to an exit not going through a
1787 // killing block.
1788 if (!PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) {
1789 if (!AnyUnreachableExit)
1790 return std::nullopt;
1791
1792 // Fall back to CFG scan starting at all non-unreachable roots if not
1793 // all paths to the exit go through CommonPred.
1794 CommonPred = nullptr;
1795 }
1796
1797 // If CommonPred itself is in the set of killing blocks, we're done.
1798 if (KillingBlocks.count(CommonPred))
1799 return {MaybeDeadAccess};
1800
1801 SetVector<BasicBlock *> WorkList;
1802 // If CommonPred is null, there are multiple exits from the function.
1803 // They all have to be added to the worklist.
1804 if (CommonPred)
1805 WorkList.insert(CommonPred);
1806 else
1807 for (BasicBlock *R : PDT.roots()) {
1808 if (!isa<UnreachableInst>(R->getTerminator()))
1809 WorkList.insert(R);
1810 }
1811
1812 NumCFGTries++;
1813 // Check if all paths starting from an exit node go through one of the
1814 // killing blocks before reaching MaybeDeadAccess.
1815 for (unsigned I = 0; I < WorkList.size(); I++) {
1816 NumCFGChecks++;
1817 BasicBlock *Current = WorkList[I];
1818 if (KillingBlocks.count(Current))
1819 continue;
1820 if (Current == MaybeDeadAccess->getBlock())
1821 return std::nullopt;
1822
1823 // MaybeDeadAccess is reachable from the entry, so we don't have to
1824 // explore unreachable blocks further.
1825 if (!DT.isReachableFromEntry(Current))
1826 continue;
1827
1828 WorkList.insert_range(predecessors(Current));
1829
1830 if (WorkList.size() >= MemorySSAPathCheckLimit)
1831 return std::nullopt;
1832 }
1833 NumCFGSuccess++;
1834 }
1835
1836 // No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is
1837 // potentially dead.
1838 return {MaybeDeadAccess};
1839 }
1840
1841 /// Delete dead memory defs and recursively add their operands to ToRemove if
1842 /// they became dead.
1843 void
1846 MemorySSAUpdater Updater(&MSSA);
1847 SmallVector<Instruction *, 32> NowDeadInsts;
1848 NowDeadInsts.push_back(SI);
1849 --NumFastOther;
1850
1851 while (!NowDeadInsts.empty()) {
1852 Instruction *DeadInst = NowDeadInsts.pop_back_val();
1853 ++NumFastOther;
1854
1855 // Try to preserve debug information attached to the dead instruction.
1856 salvageDebugInfo(*DeadInst);
1857 salvageKnowledge(DeadInst);
1858
1859 // Remove the Instruction from MSSA.
1860 MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst);
1861 bool IsMemDef = MA && isa<MemoryDef>(MA);
1862 if (MA) {
1863 if (IsMemDef) {
1864 auto *MD = cast<MemoryDef>(MA);
1865 SkipStores.insert(MD);
1866 if (Deleted)
1867 Deleted->insert(MD);
1868 if (auto *SI = dyn_cast<StoreInst>(MD->getMemoryInst())) {
1869 if (SI->getValueOperand()->getType()->isPointerTy()) {
1870 const Value *UO = getUnderlyingObject(SI->getValueOperand());
1871 if (CapturedBeforeReturn.erase(UO))
1872 ShouldIterateEndOfFunctionDSE = true;
1873 InvisibleToCallerAfterRet.erase(UO);
1874 }
1875 }
1876 }
1877
1878 Updater.removeMemoryAccess(MA);
1879 }
1880
1881 auto I = IOLs.find(DeadInst->getParent());
1882 if (I != IOLs.end())
1883 I->second.erase(DeadInst);
1884 // Remove its operands
1885 for (Use &O : DeadInst->operands())
1886 if (Instruction *OpI = dyn_cast<Instruction>(O)) {
1887 O.set(PoisonValue::get(O->getType()));
1888 if (isInstructionTriviallyDead(OpI, &TLI))
1889 NowDeadInsts.push_back(OpI);
1890 }
1891
1892 EA.removeInstruction(DeadInst);
1893 // Remove memory defs directly if they don't produce results, but only
1894 // queue other dead instructions for later removal. They may have been
1895 // used as memory locations that have been cached by BatchAA. Removing
1896 // them here may lead to newly created instructions to be allocated at the
1897 // same address, yielding stale cache entries.
1898 if (IsMemDef && DeadInst->getType()->isVoidTy())
1899 DeadInst->eraseFromParent();
1900 else
1901 ToRemove.push_back(DeadInst);
1902 }
1903 }
1904
1905 // Check for any extra throws between \p KillingI and \p DeadI that block
1906 // DSE. This only checks extra maythrows (those that aren't MemoryDef's).
1907 // MemoryDef that may throw are handled during the walk from one def to the
1908 // next.
1909 bool mayThrowBetween(Instruction *KillingI, Instruction *DeadI,
1910 const Value *KillingUndObj) {
1911 // First see if we can ignore it by using the fact that KillingI is an
1912 // alloca/alloca like object that is not visible to the caller during
1913 // execution of the function.
1914 if (KillingUndObj && isInvisibleToCallerOnUnwind(KillingUndObj))
1915 return false;
1916
1917 if (KillingI->getParent() == DeadI->getParent())
1918 return ThrowingBlocks.count(KillingI->getParent());
1919 return !ThrowingBlocks.empty();
1920 }
1921
1922 // Check if \p DeadI acts as a DSE barrier for \p KillingI. The following
1923 // instructions act as barriers:
1924 // * A memory instruction that may throw and \p KillingI accesses a non-stack
1925 // object.
1926 // * Atomic stores stronger that monotonic.
1927 bool isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI) {
1928 // If DeadI may throw it acts as a barrier, unless we are to an
1929 // alloca/alloca like object that does not escape.
1930 if (DeadI->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj))
1931 return true;
1932
1933 // If DeadI is an atomic load/store stronger than monotonic, do not try to
1934 // eliminate/reorder it.
1935 if (DeadI->isAtomic()) {
1936 if (auto *LI = dyn_cast<LoadInst>(DeadI))
1937 return isStrongerThanMonotonic(LI->getOrdering());
1938 if (auto *SI = dyn_cast<StoreInst>(DeadI))
1939 return isStrongerThanMonotonic(SI->getOrdering());
1940 if (auto *ARMW = dyn_cast<AtomicRMWInst>(DeadI))
1941 return isStrongerThanMonotonic(ARMW->getOrdering());
1942 if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(DeadI))
1943 return isStrongerThanMonotonic(CmpXchg->getSuccessOrdering()) ||
1944 isStrongerThanMonotonic(CmpXchg->getFailureOrdering());
1945 llvm_unreachable("other instructions should be skipped in MemorySSA");
1946 }
1947 return false;
1948 }
1949
1950 /// Eliminate writes to objects that are not visible in the caller and are not
1951 /// accessed before returning from the function.
1952 bool eliminateDeadWritesAtEndOfFunction() {
1953 bool MadeChange = false;
1954 LLVM_DEBUG(
1955 dbgs()
1956 << "Trying to eliminate MemoryDefs at the end of the function\n");
1957 do {
1958 ShouldIterateEndOfFunctionDSE = false;
1959 for (MemoryDef *Def : llvm::reverse(MemDefs)) {
1960 if (SkipStores.contains(Def))
1961 continue;
1962
1963 Instruction *DefI = Def->getMemoryInst();
1964 auto DefLoc = getLocForWrite(DefI);
1965 if (!DefLoc || !isRemovable(DefI)) {
1966 LLVM_DEBUG(dbgs() << " ... could not get location for write or "
1967 "instruction not removable.\n");
1968 continue;
1969 }
1970
1971 // NOTE: Currently eliminating writes at the end of a function is
1972 // limited to MemoryDefs with a single underlying object, to save
1973 // compile-time. In practice it appears the case with multiple
1974 // underlying objects is very uncommon. If it turns out to be important,
1975 // we can use getUnderlyingObjects here instead.
1976 const Value *UO = getUnderlyingObject(DefLoc->Ptr);
1977 if (!isInvisibleToCallerAfterRet(UO))
1978 continue;
1979
1980 if (isWriteAtEndOfFunction(Def, *DefLoc)) {
1981 // See through pointer-to-pointer bitcasts
1982 LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end "
1983 "of the function\n");
1985 ++NumFastStores;
1986 MadeChange = true;
1987 }
1988 }
1989 } while (ShouldIterateEndOfFunctionDSE);
1990 return MadeChange;
1991 }
1992
1993 /// If we have a zero initializing memset following a call to malloc,
1994 /// try folding it into a call to calloc.
1995 bool tryFoldIntoCalloc(MemoryDef *Def, const Value *DefUO) {
1996 Instruction *DefI = Def->getMemoryInst();
1997 MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
1998 if (!MemSet)
1999 // TODO: Could handle zero store to small allocation as well.
2000 return false;
2001 Constant *StoredConstant = dyn_cast<Constant>(MemSet->getValue());
2002 if (!StoredConstant || !StoredConstant->isNullValue())
2003 return false;
2004
2005 if (!isRemovable(DefI))
2006 // The memset might be volatile..
2007 return false;
2008
2009 if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
2010 F.hasFnAttribute(Attribute::SanitizeAddress) ||
2011 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
2012 F.getName() == "calloc")
2013 return false;
2014 auto *Malloc = const_cast<CallInst *>(dyn_cast<CallInst>(DefUO));
2015 if (!Malloc)
2016 return false;
2017 auto *InnerCallee = Malloc->getCalledFunction();
2018 if (!InnerCallee)
2019 return false;
2021 StringRef ZeroedVariantName;
2022 if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
2023 Func != LibFunc_malloc) {
2024 Attribute Attr = Malloc->getFnAttr("alloc-variant-zeroed");
2025 if (!Attr.isValid())
2026 return false;
2027 ZeroedVariantName = Attr.getValueAsString();
2028 if (ZeroedVariantName.empty())
2029 return false;
2030 }
2031
2032 // Gracefully handle malloc with unexpected memory attributes.
2033 auto *MallocDef = dyn_cast_or_null<MemoryDef>(MSSA.getMemoryAccess(Malloc));
2034 if (!MallocDef)
2035 return false;
2036
2037 auto shouldCreateCalloc = [](CallInst *Malloc, CallInst *Memset) {
2038 // Check for br(icmp ptr, null), truebb, falsebb) pattern at the end
2039 // of malloc block
2040 auto *MallocBB = Malloc->getParent(),
2041 *MemsetBB = Memset->getParent();
2042 if (MallocBB == MemsetBB)
2043 return true;
2044 auto *Ptr = Memset->getArgOperand(0);
2045 auto *TI = MallocBB->getTerminator();
2046 BasicBlock *TrueBB, *FalseBB;
2047 if (!match(TI, m_Br(m_SpecificICmp(ICmpInst::ICMP_EQ, m_Specific(Ptr),
2048 m_Zero()),
2049 TrueBB, FalseBB)))
2050 return false;
2051 if (MemsetBB != FalseBB)
2052 return false;
2053 return true;
2054 };
2055
2056 if (Malloc->getOperand(0) != MemSet->getLength())
2057 return false;
2058 if (!shouldCreateCalloc(Malloc, MemSet) || !DT.dominates(Malloc, MemSet) ||
2059 !memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT))
2060 return false;
2061 IRBuilder<> IRB(Malloc);
2062 assert(Func == LibFunc_malloc || !ZeroedVariantName.empty());
2063 Value *Calloc = nullptr;
2064 if (!ZeroedVariantName.empty()) {
2065 LLVMContext &Ctx = Malloc->getContext();
2066 AttributeList Attrs = InnerCallee->getAttributes();
2067 AllocFnKind AllocKind =
2068 Attrs.getFnAttr(Attribute::AllocKind).getAllocKind() |
2069 AllocFnKind::Zeroed;
2070 AllocKind &= ~AllocFnKind::Uninitialized;
2071 Attrs =
2072 Attrs.addFnAttribute(Ctx, Attribute::getWithAllocKind(Ctx, AllocKind))
2073 .removeFnAttribute(Ctx, "alloc-variant-zeroed");
2074 FunctionCallee ZeroedVariant = Malloc->getModule()->getOrInsertFunction(
2075 ZeroedVariantName, InnerCallee->getFunctionType(), Attrs);
2077 Args.append(Malloc->arg_begin(), Malloc->arg_end());
2078 Calloc = IRB.CreateCall(ZeroedVariant, Args, ZeroedVariantName);
2079 } else {
2080 Type *SizeTTy = Malloc->getArgOperand(0)->getType();
2081 Calloc =
2082 emitCalloc(ConstantInt::get(SizeTTy, 1), Malloc->getArgOperand(0),
2083 IRB, TLI, Malloc->getType()->getPointerAddressSpace());
2084 }
2085 if (!Calloc)
2086 return false;
2087
2088 MemorySSAUpdater Updater(&MSSA);
2089 auto *NewAccess =
2090 Updater.createMemoryAccessAfter(cast<Instruction>(Calloc), nullptr,
2091 MallocDef);
2092 auto *NewAccessMD = cast<MemoryDef>(NewAccess);
2093 Updater.insertDef(NewAccessMD, /*RenameUses=*/true);
2094 Malloc->replaceAllUsesWith(Calloc);
2096 return true;
2097 }
2098
2099 // Check if there is a dominating condition, that implies that the value
2100 // being stored in a ptr is already present in the ptr.
2101 bool dominatingConditionImpliesValue(MemoryDef *Def) {
2102 auto *StoreI = cast<StoreInst>(Def->getMemoryInst());
2103 BasicBlock *StoreBB = StoreI->getParent();
2104 Value *StorePtr = StoreI->getPointerOperand();
2105 Value *StoreVal = StoreI->getValueOperand();
2106
2107 DomTreeNode *IDom = DT.getNode(StoreBB)->getIDom();
2108 if (!IDom)
2109 return false;
2110
2111 auto *BI = dyn_cast<BranchInst>(IDom->getBlock()->getTerminator());
2112 if (!BI || !BI->isConditional())
2113 return false;
2114
2115 // In case both blocks are the same, it is not possible to determine
2116 // if optimization is possible. (We would not want to optimize a store
2117 // in the FalseBB if condition is true and vice versa.)
2118 if (BI->getSuccessor(0) == BI->getSuccessor(1))
2119 return false;
2120
2121 Instruction *ICmpL;
2122 CmpPredicate Pred;
2123 if (!match(BI->getCondition(),
2124 m_c_ICmp(Pred,
2125 m_CombineAnd(m_Load(m_Specific(StorePtr)),
2126 m_Instruction(ICmpL)),
2127 m_Specific(StoreVal))) ||
2128 !ICmpInst::isEquality(Pred))
2129 return false;
2130
2131 // In case the else blocks also branches to the if block or the other way
2132 // around it is not possible to determine if the optimization is possible.
2133 if (Pred == ICmpInst::ICMP_EQ &&
2134 !DT.dominates(BasicBlockEdge(BI->getParent(), BI->getSuccessor(0)),
2135 StoreBB))
2136 return false;
2137
2138 if (Pred == ICmpInst::ICMP_NE &&
2139 !DT.dominates(BasicBlockEdge(BI->getParent(), BI->getSuccessor(1)),
2140 StoreBB))
2141 return false;
2142
2143 MemoryAccess *LoadAcc = MSSA.getMemoryAccess(ICmpL);
2144 MemoryAccess *ClobAcc =
2145 MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA);
2146
2147 return MSSA.dominates(ClobAcc, LoadAcc);
2148 }
2149
2150 /// \returns true if \p Def is a no-op store, either because it
2151 /// directly stores back a loaded value or stores zero to a calloced object.
2152 bool storeIsNoop(MemoryDef *Def, const Value *DefUO) {
2153 Instruction *DefI = Def->getMemoryInst();
2154 StoreInst *Store = dyn_cast<StoreInst>(DefI);
2155 MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
2156 Constant *StoredConstant = nullptr;
2157 if (Store)
2158 StoredConstant = dyn_cast<Constant>(Store->getOperand(0));
2159 else if (MemSet)
2160 StoredConstant = dyn_cast<Constant>(MemSet->getValue());
2161 else
2162 return false;
2163
2164 if (!isRemovable(DefI))
2165 return false;
2166
2167 if (StoredConstant) {
2168 Constant *InitC =
2169 getInitialValueOfAllocation(DefUO, &TLI, StoredConstant->getType());
2170 // If the clobbering access is LiveOnEntry, no instructions between them
2171 // can modify the memory location.
2172 if (InitC && InitC == StoredConstant)
2173 return MSSA.isLiveOnEntryDef(
2174 MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA));
2175 }
2176
2177 if (!Store)
2178 return false;
2179
2180 if (dominatingConditionImpliesValue(Def))
2181 return true;
2182
2183 if (auto *LoadI = dyn_cast<LoadInst>(Store->getOperand(0))) {
2184 if (LoadI->getPointerOperand() == Store->getOperand(1)) {
2185 // Get the defining access for the load.
2186 auto *LoadAccess = MSSA.getMemoryAccess(LoadI)->getDefiningAccess();
2187 // Fast path: the defining accesses are the same.
2188 if (LoadAccess == Def->getDefiningAccess())
2189 return true;
2190
2191 // Look through phi accesses. Recursively scan all phi accesses by
2192 // adding them to a worklist. Bail when we run into a memory def that
2193 // does not match LoadAccess.
2195 MemoryAccess *Current =
2196 MSSA.getWalker()->getClobberingMemoryAccess(Def, BatchAA);
2197 // We don't want to bail when we run into the store memory def. But,
2198 // the phi access may point to it. So, pretend like we've already
2199 // checked it.
2200 ToCheck.insert(Def);
2201 ToCheck.insert(Current);
2202 // Start at current (1) to simulate already having checked Def.
2203 for (unsigned I = 1; I < ToCheck.size(); ++I) {
2204 Current = ToCheck[I];
2205 if (auto PhiAccess = dyn_cast<MemoryPhi>(Current)) {
2206 // Check all the operands.
2207 for (auto &Use : PhiAccess->incoming_values())
2208 ToCheck.insert(cast<MemoryAccess>(&Use));
2209 continue;
2210 }
2211
2212 // If we found a memory def, bail. This happens when we have an
2213 // unrelated write in between an otherwise noop store.
2214 assert(isa<MemoryDef>(Current) &&
2215 "Only MemoryDefs should reach here.");
2216 // TODO: Skip no alias MemoryDefs that have no aliasing reads.
2217 // We are searching for the definition of the store's destination.
2218 // So, if that is the same definition as the load, then this is a
2219 // noop. Otherwise, fail.
2220 if (LoadAccess != Current)
2221 return false;
2222 }
2223 return true;
2224 }
2225 }
2226
2227 return false;
2228 }
2229
2230 bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {
2231 bool Changed = false;
2232 for (auto OI : IOL) {
2233 Instruction *DeadI = OI.first;
2234 MemoryLocation Loc = *getLocForWrite(DeadI);
2235 assert(isRemovable(DeadI) && "Expect only removable instruction");
2236
2237 const Value *Ptr = Loc.Ptr->stripPointerCasts();
2238 int64_t DeadStart = 0;
2239 uint64_t DeadSize = Loc.Size.getValue();
2240 GetPointerBaseWithConstantOffset(Ptr, DeadStart, DL);
2241 OverlapIntervalsTy &IntervalMap = OI.second;
2242 Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
2243 if (IntervalMap.empty())
2244 continue;
2245 Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
2246 }
2247 return Changed;
2248 }
2249
2250 /// Eliminates writes to locations where the value that is being written
2251 /// is already stored at the same location.
2252 bool eliminateRedundantStoresOfExistingValues() {
2253 bool MadeChange = false;
2254 LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "
2255 "already existing value\n");
2256 for (auto *Def : MemDefs) {
2257 if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def))
2258 continue;
2259
2260 Instruction *DefInst = Def->getMemoryInst();
2261 auto MaybeDefLoc = getLocForWrite(DefInst);
2262 if (!MaybeDefLoc || !isRemovable(DefInst))
2263 continue;
2264
2265 MemoryDef *UpperDef;
2266 // To conserve compile-time, we avoid walking to the next clobbering def.
2267 // Instead, we just try to get the optimized access, if it exists. DSE
2268 // will try to optimize defs during the earlier traversal.
2269 if (Def->isOptimized())
2270 UpperDef = dyn_cast<MemoryDef>(Def->getOptimized());
2271 else
2272 UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());
2273 if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef))
2274 continue;
2275
2276 Instruction *UpperInst = UpperDef->getMemoryInst();
2277 auto IsRedundantStore = [&]() {
2278 // We don't care about differences in call attributes here.
2279 if (DefInst->isIdenticalToWhenDefined(UpperInst,
2280 /*IntersectAttrs=*/true))
2281 return true;
2282 if (auto *MemSetI = dyn_cast<MemSetInst>(UpperInst)) {
2283 if (auto *SI = dyn_cast<StoreInst>(DefInst)) {
2284 // MemSetInst must have a write location.
2285 auto UpperLoc = getLocForWrite(UpperInst);
2286 if (!UpperLoc)
2287 return false;
2288 int64_t InstWriteOffset = 0;
2289 int64_t DepWriteOffset = 0;
2290 auto OR = isOverwrite(UpperInst, DefInst, *UpperLoc, *MaybeDefLoc,
2291 InstWriteOffset, DepWriteOffset);
2292 Value *StoredByte = isBytewiseValue(SI->getValueOperand(), DL);
2293 return StoredByte && StoredByte == MemSetI->getOperand(1) &&
2294 OR == OW_Complete;
2295 }
2296 }
2297 return false;
2298 };
2299
2300 if (!IsRedundantStore() || isReadClobber(*MaybeDefLoc, DefInst))
2301 continue;
2302 LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *DefInst
2303 << '\n');
2304 deleteDeadInstruction(DefInst);
2305 NumRedundantStores++;
2306 MadeChange = true;
2307 }
2308 return MadeChange;
2309 }
2310
2311 // Return the locations written by the initializes attribute.
2312 // Note that this function considers:
2313 // 1. Unwind edge: use "initializes" attribute only if the callee has
2314 // "nounwind" attribute, or the argument has "dead_on_unwind" attribute,
2315 // or the argument is invisible to caller on unwind. That is, we don't
2316 // perform incorrect DSE on unwind edges in the current function.
2317 // 2. Argument alias: for aliasing arguments, the "initializes" attribute is
2318 // the intersected range list of their "initializes" attributes.
2319 SmallVector<MemoryLocation, 1> getInitializesArgMemLoc(const Instruction *I);
2320
2321 // Try to eliminate dead defs that access `KillingLocWrapper.MemLoc` and are
2322 // killed by `KillingLocWrapper.MemDef`. Return whether
2323 // any changes were made, and whether `KillingLocWrapper.DefInst` was deleted.
2324 std::pair<bool, bool>
2325 eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper);
2326
2327 // Try to eliminate dead defs killed by `KillingDefWrapper` and return the
2328 // change state: whether make any change.
2329 bool eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper);
2330};
2331
2332// Return true if "Arg" is function local and isn't captured before "CB".
2333bool isFuncLocalAndNotCaptured(Value *Arg, const CallBase *CB,
2335 const Value *UnderlyingObj = getUnderlyingObject(Arg);
2336 return isIdentifiedFunctionLocal(UnderlyingObj) &&
2338 EA.getCapturesBefore(UnderlyingObj, CB, /*OrAt*/ true));
2339}
2340
2342DSEState::getInitializesArgMemLoc(const Instruction *I) {
2343 const CallBase *CB = dyn_cast<CallBase>(I);
2344 if (!CB)
2345 return {};
2346
2347 // Collect aliasing arguments and their initializes ranges.
2349 for (unsigned Idx = 0, Count = CB->arg_size(); Idx < Count; ++Idx) {
2350 Value *CurArg = CB->getArgOperand(Idx);
2351 if (!CurArg->getType()->isPointerTy())
2352 continue;
2353
2354 ConstantRangeList Inits;
2355 Attribute InitializesAttr = CB->getParamAttr(Idx, Attribute::Initializes);
2356 // initializes on byval arguments refers to the callee copy, not the
2357 // original memory the caller passed in.
2358 if (InitializesAttr.isValid() && !CB->isByValArgument(Idx))
2359 Inits = InitializesAttr.getValueAsConstantRangeList();
2360
2361 // Check whether "CurArg" could alias with global variables. We require
2362 // either it's function local and isn't captured before or the "CB" only
2363 // accesses arg or inaccessible mem.
2364 if (!Inits.empty() && !CB->onlyAccessesInaccessibleMemOrArgMem() &&
2365 !isFuncLocalAndNotCaptured(CurArg, CB, EA))
2366 Inits = ConstantRangeList();
2367
2368 // We don't perform incorrect DSE on unwind edges in the current function,
2369 // and use the "initializes" attribute to kill dead stores if:
2370 // - The call does not throw exceptions, "CB->doesNotThrow()".
2371 // - Or the callee parameter has "dead_on_unwind" attribute.
2372 // - Or the argument is invisible to caller on unwind, and there are no
2373 // unwind edges from this call in the current function (e.g. `CallInst`).
2374 bool IsDeadOrInvisibleOnUnwind =
2375 CB->paramHasAttr(Idx, Attribute::DeadOnUnwind) ||
2376 (isa<CallInst>(CB) && isInvisibleToCallerOnUnwind(CurArg));
2377 ArgumentInitInfo InitInfo{Idx, IsDeadOrInvisibleOnUnwind, Inits};
2378 bool FoundAliasing = false;
2379 for (auto &[Arg, AliasList] : Arguments) {
2380 auto AAR = BatchAA.alias(MemoryLocation::getBeforeOrAfter(Arg),
2382 if (AAR == AliasResult::NoAlias) {
2383 continue;
2384 } else if (AAR == AliasResult::MustAlias) {
2385 FoundAliasing = true;
2386 AliasList.push_back(InitInfo);
2387 } else {
2388 // For PartialAlias and MayAlias, there is an offset or may be an
2389 // unknown offset between the arguments and we insert an empty init
2390 // range to discard the entire initializes info while intersecting.
2391 FoundAliasing = true;
2392 AliasList.push_back(ArgumentInitInfo{Idx, IsDeadOrInvisibleOnUnwind,
2394 }
2395 }
2396 if (!FoundAliasing)
2397 Arguments[CurArg] = {InitInfo};
2398 }
2399
2401 for (const auto &[_, Args] : Arguments) {
2402 auto IntersectedRanges =
2403 getIntersectedInitRangeList(Args, CB->doesNotThrow());
2404 if (IntersectedRanges.empty())
2405 continue;
2406
2407 for (const auto &Arg : Args) {
2408 for (const auto &Range : IntersectedRanges) {
2409 int64_t Start = Range.getLower().getSExtValue();
2410 int64_t End = Range.getUpper().getSExtValue();
2411 // For now, we only handle locations starting at offset 0.
2412 if (Start == 0)
2413 Locations.push_back(MemoryLocation(CB->getArgOperand(Arg.Idx),
2414 LocationSize::precise(End - Start),
2415 CB->getAAMetadata()));
2416 }
2417 }
2418 }
2419 return Locations;
2420}
2421
2422std::pair<bool, bool>
2423DSEState::eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper) {
2424 bool Changed = false;
2425 bool DeletedKillingLoc = false;
2426 unsigned ScanLimit = MemorySSAScanLimit;
2427 unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
2428 unsigned PartialLimit = MemorySSAPartialStoreLimit;
2429 // Worklist of MemoryAccesses that may be killed by
2430 // "KillingLocWrapper.MemDef".
2432 // Track MemoryAccesses that have been deleted in the loop below, so we can
2433 // skip them. Don't use SkipStores for this, which may contain reused
2434 // MemoryAccess addresses.
2436 [[maybe_unused]] unsigned OrigNumSkipStores = SkipStores.size();
2437 ToCheck.insert(KillingLocWrapper.MemDef->getDefiningAccess());
2438
2439 // Check if MemoryAccesses in the worklist are killed by
2440 // "KillingLocWrapper.MemDef".
2441 for (unsigned I = 0; I < ToCheck.size(); I++) {
2442 MemoryAccess *Current = ToCheck[I];
2443 if (Deleted.contains(Current))
2444 continue;
2445 std::optional<MemoryAccess *> MaybeDeadAccess = getDomMemoryDef(
2446 KillingLocWrapper.MemDef, Current, KillingLocWrapper.MemLoc,
2447 KillingLocWrapper.UnderlyingObject, ScanLimit, WalkerStepLimit,
2448 isMemTerminatorInst(KillingLocWrapper.DefInst), PartialLimit,
2449 KillingLocWrapper.DefByInitializesAttr);
2450
2451 if (!MaybeDeadAccess) {
2452 LLVM_DEBUG(dbgs() << " finished walk\n");
2453 continue;
2454 }
2455 MemoryAccess *DeadAccess = *MaybeDeadAccess;
2456 LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess);
2457 if (isa<MemoryPhi>(DeadAccess)) {
2458 LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n");
2459 for (Value *V : cast<MemoryPhi>(DeadAccess)->incoming_values()) {
2460 MemoryAccess *IncomingAccess = cast<MemoryAccess>(V);
2461 BasicBlock *IncomingBlock = IncomingAccess->getBlock();
2462 BasicBlock *PhiBlock = DeadAccess->getBlock();
2463
2464 // We only consider incoming MemoryAccesses that come before the
2465 // MemoryPhi. Otherwise we could discover candidates that do not
2466 // strictly dominate our starting def.
2467 if (PostOrderNumbers[IncomingBlock] > PostOrderNumbers[PhiBlock])
2468 ToCheck.insert(IncomingAccess);
2469 }
2470 continue;
2471 }
2472 // We cannot apply the initializes attribute to DeadAccess/DeadDef.
2473 // It would incorrectly consider a call instruction as redundant store
2474 // and remove this call instruction.
2475 // TODO: this conflates the existence of a MemoryLocation with being able
2476 // to delete the instruction. Fix isRemovable() to consider calls with
2477 // side effects that cannot be removed, e.g. calls with the initializes
2478 // attribute, and remove getLocForInst(ConsiderInitializesAttr = false).
2479 MemoryDefWrapper DeadDefWrapper(
2480 cast<MemoryDef>(DeadAccess),
2481 getLocForInst(cast<MemoryDef>(DeadAccess)->getMemoryInst(),
2482 /*ConsiderInitializesAttr=*/false));
2483 assert(DeadDefWrapper.DefinedLocations.size() == 1);
2484 MemoryLocationWrapper &DeadLocWrapper =
2485 DeadDefWrapper.DefinedLocations.front();
2486 LLVM_DEBUG(dbgs() << " (" << *DeadLocWrapper.DefInst << ")\n");
2487 ToCheck.insert(DeadLocWrapper.MemDef->getDefiningAccess());
2488 NumGetDomMemoryDefPassed++;
2489
2490 if (!DebugCounter::shouldExecute(MemorySSACounter))
2491 continue;
2492 if (isMemTerminatorInst(KillingLocWrapper.DefInst)) {
2493 if (KillingLocWrapper.UnderlyingObject != DeadLocWrapper.UnderlyingObject)
2494 continue;
2495 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
2496 << *DeadLocWrapper.DefInst << "\n KILLER: "
2497 << *KillingLocWrapper.DefInst << '\n');
2498 deleteDeadInstruction(DeadLocWrapper.DefInst, &Deleted);
2499 ++NumFastStores;
2500 Changed = true;
2501 } else {
2502 // Check if DeadI overwrites KillingI.
2503 int64_t KillingOffset = 0;
2504 int64_t DeadOffset = 0;
2505 OverwriteResult OR =
2506 isOverwrite(KillingLocWrapper.DefInst, DeadLocWrapper.DefInst,
2507 KillingLocWrapper.MemLoc, DeadLocWrapper.MemLoc,
2508 KillingOffset, DeadOffset);
2509 if (OR == OW_MaybePartial) {
2510 auto &IOL = IOLs[DeadLocWrapper.DefInst->getParent()];
2511 OR = isPartialOverwrite(KillingLocWrapper.MemLoc, DeadLocWrapper.MemLoc,
2512 KillingOffset, DeadOffset,
2513 DeadLocWrapper.DefInst, IOL);
2514 }
2515 if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
2516 auto *DeadSI = dyn_cast<StoreInst>(DeadLocWrapper.DefInst);
2517 auto *KillingSI = dyn_cast<StoreInst>(KillingLocWrapper.DefInst);
2518 // We are re-using tryToMergePartialOverlappingStores, which requires
2519 // DeadSI to dominate KillingSI.
2520 // TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
2521 if (DeadSI && KillingSI && DT.dominates(DeadSI, KillingSI)) {
2523 KillingSI, DeadSI, KillingOffset, DeadOffset, DL, BatchAA,
2524 &DT)) {
2525
2526 // Update stored value of earlier store to merged constant.
2527 DeadSI->setOperand(0, Merged);
2528 ++NumModifiedStores;
2529 Changed = true;
2530 DeletedKillingLoc = true;
2531
2532 // Remove killing store and remove any outstanding overlap
2533 // intervals for the updated store.
2534 deleteDeadInstruction(KillingSI, &Deleted);
2535 auto I = IOLs.find(DeadSI->getParent());
2536 if (I != IOLs.end())
2537 I->second.erase(DeadSI);
2538 break;
2539 }
2540 }
2541 }
2542 if (OR == OW_Complete) {
2543 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
2544 << *DeadLocWrapper.DefInst << "\n KILLER: "
2545 << *KillingLocWrapper.DefInst << '\n');
2546 deleteDeadInstruction(DeadLocWrapper.DefInst, &Deleted);
2547 ++NumFastStores;
2548 Changed = true;
2549 }
2550 }
2551 }
2552
2553 assert(SkipStores.size() - OrigNumSkipStores == Deleted.size() &&
2554 "SkipStores and Deleted out of sync?");
2555
2556 return {Changed, DeletedKillingLoc};
2557}
2558
2559bool DSEState::eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper) {
2560 if (KillingDefWrapper.DefinedLocations.empty()) {
2561 LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
2562 << *KillingDefWrapper.DefInst << "\n");
2563 return false;
2564 }
2565
2566 bool MadeChange = false;
2567 for (auto &KillingLocWrapper : KillingDefWrapper.DefinedLocations) {
2568 LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
2569 << *KillingLocWrapper.MemDef << " ("
2570 << *KillingLocWrapper.DefInst << ")\n");
2571 auto [Changed, DeletedKillingLoc] = eliminateDeadDefs(KillingLocWrapper);
2572 MadeChange |= Changed;
2573
2574 // Check if the store is a no-op.
2575 if (!DeletedKillingLoc && storeIsNoop(KillingLocWrapper.MemDef,
2576 KillingLocWrapper.UnderlyingObject)) {
2577 LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: "
2578 << *KillingLocWrapper.DefInst << '\n');
2579 deleteDeadInstruction(KillingLocWrapper.DefInst);
2580 NumRedundantStores++;
2581 MadeChange = true;
2582 continue;
2583 }
2584 // Can we form a calloc from a memset/malloc pair?
2585 if (!DeletedKillingLoc &&
2586 tryFoldIntoCalloc(KillingLocWrapper.MemDef,
2587 KillingLocWrapper.UnderlyingObject)) {
2588 LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"
2589 << " DEAD: " << *KillingLocWrapper.DefInst << '\n');
2590 deleteDeadInstruction(KillingLocWrapper.DefInst);
2591 MadeChange = true;
2592 continue;
2593 }
2594 }
2595 return MadeChange;
2596}
2597
2598static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
2600 const TargetLibraryInfo &TLI,
2601 const LoopInfo &LI) {
2602 bool MadeChange = false;
2603 DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
2604 // For each store:
2605 for (unsigned I = 0; I < State.MemDefs.size(); I++) {
2606 MemoryDef *KillingDef = State.MemDefs[I];
2607 if (State.SkipStores.count(KillingDef))
2608 continue;
2609
2610 MemoryDefWrapper KillingDefWrapper(
2611 KillingDef, State.getLocForInst(KillingDef->getMemoryInst(),
2613 MadeChange |= State.eliminateDeadDefs(KillingDefWrapper);
2614 }
2615
2617 for (auto &KV : State.IOLs)
2618 MadeChange |= State.removePartiallyOverlappedStores(KV.second);
2619
2620 MadeChange |= State.eliminateRedundantStoresOfExistingValues();
2621 MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
2622
2623 while (!State.ToRemove.empty()) {
2624 Instruction *DeadInst = State.ToRemove.pop_back_val();
2625 DeadInst->eraseFromParent();
2626 }
2627
2628 return MadeChange;
2629}
2630} // end anonymous namespace
2631
2632//===----------------------------------------------------------------------===//
2633// DSE Pass
2634//===----------------------------------------------------------------------===//
2639 MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
2641 LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
2642
2643 bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
2644
2645#ifdef LLVM_ENABLE_STATS
2647 for (auto &I : instructions(F))
2648 NumRemainingStores += isa<StoreInst>(&I);
2649#endif
2650
2651 if (!Changed)
2652 return PreservedAnalyses::all();
2653
2657 PA.preserve<LoopAnalysis>();
2658 return PA;
2659}
2660
2661namespace {
2662
2663/// A legacy pass for the legacy pass manager that wraps \c DSEPass.
2664class DSELegacyPass : public FunctionPass {
2665public:
2666 static char ID; // Pass identification, replacement for typeid
2667
2668 DSELegacyPass() : FunctionPass(ID) {
2670 }
2671
2672 bool runOnFunction(Function &F) override {
2673 if (skipFunction(F))
2674 return false;
2675
2676 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
2677 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2678 const TargetLibraryInfo &TLI =
2679 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
2680 MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
2681 PostDominatorTree &PDT =
2682 getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
2683 LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2684
2685 bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
2686
2687#ifdef LLVM_ENABLE_STATS
2689 for (auto &I : instructions(F))
2690 NumRemainingStores += isa<StoreInst>(&I);
2691#endif
2692
2693 return Changed;
2694 }
2695
2696 void getAnalysisUsage(AnalysisUsage &AU) const override {
2697 AU.setPreservesCFG();
2710 }
2711};
2712
2713} // end anonymous namespace
2714
2715char DSELegacyPass::ID = 0;
2716
2717INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false,
2718 false)
2728INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,
2729 false)
2730
2731namespace llvm {
2733 return new DSELegacyPass();
2734}
2735} // namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Lower Kernel Arguments
This file implements a class to represent arbitrary precision integral constant values and operations...
ReachingDefAnalysis InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_ABI
Definition: Compiler.h:213
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static cl::opt< bool > EnableInitializesImprovement("enable-dse-initializes-attr-improvement", cl::init(true), cl::Hidden, cl::desc("Enable the initializes attr improvement in DSE"))
static void shortenAssignment(Instruction *Inst, Value *OriginalDest, uint64_t OldOffsetInBits, uint64_t OldSizeInBits, uint64_t NewSizeInBits, bool IsOverwriteEnd)
static bool isShortenableAtTheEnd(Instruction *I)
Returns true if the end of this instruction can be safely shortened in length.
static cl::opt< bool > EnablePartialStoreMerging("enable-dse-partial-store-merging", cl::init(true), cl::Hidden, cl::desc("Enable partial store merging in DSE"))
static bool tryToShortenBegin(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
std::map< int64_t, int64_t > OverlapIntervalsTy
static bool isShortenableAtTheBeginning(Instruction *I)
Returns true if the beginning of this instruction can be safely shortened in length.
static cl::opt< unsigned > MemorySSADefsPerBlockLimit("dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden, cl::desc("The number of MemoryDefs we consider as candidates to eliminated " "other stores per basic block (default = 5000)"))
static Constant * tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI, int64_t KillingOffset, int64_t DeadOffset, const DataLayout &DL, BatchAAResults &AA, DominatorTree *DT)
static bool memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI, BatchAAResults &AA, const DataLayout &DL, DominatorTree *DT)
Returns true if the memory which is accessed by the second instruction is not modified between the fi...
static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI, const Instruction *DeadI, BatchAAResults &AA)
Check if two instruction are masked stores that completely overwrite one another.
static cl::opt< unsigned > MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5), cl::Hidden, cl::desc("The cost of a step in a different basic " "block than the killing MemoryDef" "(default = 5)"))
static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart, uint64_t &DeadSize, int64_t KillingStart, uint64_t KillingSize, bool IsOverwriteEnd)
static cl::opt< unsigned > MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden, cl::desc("The number of memory instructions to scan for " "dead store elimination (default = 150)"))
static cl::opt< unsigned > MemorySSASameBBStepCost("dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden, cl::desc("The cost of a step in the same basic block as the killing MemoryDef" "(default = 1)"))
static cl::opt< bool > EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking", cl::init(true), cl::Hidden, cl::desc("Enable partial-overwrite tracking in DSE"))
static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc, const MemoryLocation &DeadLoc, int64_t KillingOff, int64_t DeadOff, Instruction *DeadI, InstOverlapIntervalsTy &IOL)
Return 'OW_Complete' if a store to the 'KillingLoc' location completely overwrites a store to the 'De...
static cl::opt< unsigned > MemorySSAPartialStoreLimit("dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden, cl::desc("The maximum number candidates that only partially overwrite the " "killing MemoryDef to consider" " (default = 5)"))
static std::optional< TypeSize > getPointerSize(const Value *V, const DataLayout &DL, const TargetLibraryInfo &TLI, const Function *F)
static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
static void adjustArgAttributes(AnyMemIntrinsic *Intrinsic, unsigned ArgNo, uint64_t PtrOffset)
Update the attributes given that a memory access is updated (the dereferenced pointer could be moved ...
static cl::opt< unsigned > MemorySSAUpwardsStepLimit("dse-memoryssa-walklimit", cl::init(90), cl::Hidden, cl::desc("The maximum number of steps while walking upwards to find " "MemoryDefs that may be killed (default = 90)"))
static cl::opt< bool > OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden, cl::desc("Allow DSE to optimize memory accesses."))
static cl::opt< unsigned > MemorySSAPathCheckLimit("dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden, cl::desc("The maximum number of blocks to check when trying to prove that " "all paths to an exit go through a killing block (default = 50)"))
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:194
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static bool runOnFunction(Function &F, bool PostInlining)
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
#define _
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
static void deleteDeadInstruction(Instruction *I)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Machine Common Subexpression Elimination
Definition: MachineCSE.cpp:169
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:39
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
#define LLVM_DEBUG(...)
Definition: Debug.h:119
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
A private abstract base class describing the concept of an individual alias analysis implementation.
Class for arbitrary precision integers.
Definition: APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:1012
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:258
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1488
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1562
The possible results of an alias query.
Definition: AliasAnalysis.h:78
@ NoAlias
The two locations do not alias at all.
Definition: AliasAnalysis.h:96
@ PartialAlias
The two locations alias, but only due to a partial overlap.
@ MustAlias
The two locations precisely alias each other.
constexpr int32_t getOffset() const
constexpr bool hasOffset() const
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:255
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:412
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:270
This class represents an incoming formal argument to a Function.
Definition: Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
An immutable pass that tracks lazily created AssumptionCache objects.
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
Definition: AttributeMask.h:44
static LLVM_ABI Attribute getWithAllocKind(LLVMContext &Context, AllocFnKind Kind)
Definition: Attributes.cpp:303
LLVM_ABI ArrayRef< ConstantRange > getValueAsConstantRangeList() const
Return the attribute's value as a ConstantRange array.
Definition: Attributes.cpp:420
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:400
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition: Attributes.h:223
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:213
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:233
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:73
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1116
LLVM_ABI bool onlyAccessesInaccessibleMemory() const
Determine if the function may only access memory that is inaccessible from the IR.
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
Attribute getParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Get the attribute of a given kind from a given arg.
Definition: InstrTypes.h:1648
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
Definition: InstrTypes.h:1709
LLVM_ABI bool onlyAccessesInaccessibleMemOrArgMem() const
Determine if the function may only access memory that is either inaccessible from the IR or pointed t...
bool doesNotThrow() const
Determine if the call cannot unwind.
Definition: InstrTypes.h:1955
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1292
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
LLVM_ABI Value * getArgOperandWithAttribute(Attribute::AttrKind Kind) const
If one of the arguments has the specified attribute, returns its operand value.
unsigned arg_size() const
Definition: InstrTypes.h:1290
This class represents a function call, abstracting a target machine's calling convention.
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Definition: CmpPredicate.h:23
This class represents a list of constant ranges.
bool empty() const
Return true if this list contains no members.
LLVM_ABI ConstantRangeList intersectWith(const ConstantRangeList &CRL) const
Return the range list that results from the intersection of this ConstantRangeList with another Const...
const APInt & getLower() const
Return the lower value for this range.
const APInt & getUpper() const
Return the upper value for this range.
This is an important base class in LLVM.
Definition: Constant.h:43
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
Assignment ID.
static DIAssignID * getDistinct(LLVMContext &Context)
static LLVM_ABI std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
Record of a variable value-assignment, aka a non instruction representation of the dbg....
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:88
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:177
bool erase(const KeyT &Val)
Definition: DenseMap.h:319
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:230
DomTreeNodeBase * getIDom() const
NodeT * getBlock() const
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:284
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
iterator_range< root_iterator > roots()
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:322
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:165
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:334
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:135
Context-sensitive CaptureAnalysis provider, which computes and caches the earliest common dominator c...
void removeInstruction(Instruction *I)
CaptureComponents getCapturesBefore(const Value *Object, const Instruction *I, bool OrAt) override
Return how Object may be captured before instruction I, considering only provenance captures.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:170
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:314
const BasicBlock & getEntryBlock() const
Definition: Function.h:807
static GetElementPtrInst * CreateInBounds(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Create an "inbounds" getelementptr.
Definition: Instructions.h:997
Legacy wrapper pass to provide the GlobalsAAResult object.
bool isEquality() const
Return true if this predicate is either EQ or NE.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2780
LLVM_ABI bool mayThrow(bool IncludePhaseOneUnwind=false) const LLVM_READONLY
Return true if this instruction may throw an exception.
LLVM_ABI bool isLifetimeStartOrEnd() const LLVM_READONLY
Return true if the instruction is a llvm.lifetime.start or llvm.lifetime.end marker.
LLVM_ABI bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI bool isIdenticalToWhenDefined(const Instruction *I, bool IntersectAttrs=false) const LLVM_READONLY
This is like isIdenticalTo, except that it ignores the SubclassOptionalData flags,...
bool isTerminator() const
Definition: Instruction.h:315
LLVM_ABI bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
LLVM_ABI bool willReturn() const LLVM_READONLY
Return true if the instruction will return (unwinding is considered as a form of returning control fl...
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition: Metadata.cpp:1789
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:510
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:86
const_iterator begin() const
Definition: IntervalMap.h:1147
bool empty() const
empty - Return true when no intervals are mapped.
Definition: IntervalMap.h:1102
const_iterator end() const
Definition: IntervalMap.h:1159
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:49
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
static LocationSize precise(uint64_t Value)
bool isScalable() const
TypeSize getValue() const
bool isPrecise() const
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:570
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:597
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:40
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
iterator end()
Definition: MapVector.h:67
iterator find(const KeyT &Key)
Definition: MapVector.h:141
Value * getLength() const
Value * getValue() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
BasicBlock * getBlock() const
Definition: MemorySSA.h:162
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:371
void setOptimized(MemoryAccess *MA)
Definition: MemorySSA.h:392
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance.
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
MemoryLocation getWithNewPtr(const Value *NewPtr) const
const Value * Ptr
The address of the start of the location.
static LLVM_ABI MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
static LLVM_ABI std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
static LLVM_ABI MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:936
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1053
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:993
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:702
LLVM_ABI MemorySSAWalker * getSkipSelfWalker()
Definition: MemorySSA.cpp:1603
LLVM_ABI bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
Definition: MemorySSA.cpp:2173
LLVM_ABI MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1590
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:720
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:740
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:260
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition: MemorySSA.h:257
PHITransAddr - An address value which tracks and handles phi translation.
Definition: PHITransAddr.h:36
LLVM_ABI Value * translateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
translateValue - PHI translate the current address up the CFG from CurBB to Pred, updating our state ...
LLVM_ABI bool isPotentiallyPHITranslatable() const
isPotentiallyPHITranslatable - If this needs PHI translation, return true if we have some hope of doi...
bool needsPHITranslationFromBlock(BasicBlock *BB) const
needsPHITranslationFromBlock - Return true if moving from the specified BasicBlock to its predecessor...
Definition: PHITransAddr.h:63
Value * getAddr() const
Definition: PHITransAddr.h:59
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1885
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
LLVM_ABI bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition: Analysis.h:132
A vector that has set insertion semantics.
Definition: SetVector.h:59
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:104
void insert_range(Range &&R)
Definition: SetVector.h:193
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:168
size_type size() const
Definition: SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:380
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:470
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
iterator begin() const
Definition: SmallPtrSet.h:494
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:476
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:356
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
An instruction for storing to memory.
Definition: Instructions.h:296
Value * getValueOperand()
Definition: Instructions.h:383
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:151
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:346
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:267
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
op_range operands()
Definition: User.h:292
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:701
bool use_empty() const
Definition: Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1098
iterator_range< use_iterator > uses()
Definition: Value.h:380
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:172
const ParentTy * getParent() const
Definition: ilist_node.h:34
self_iterator getIterator()
Definition: ilist_node.h:134
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:862
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:962
CmpClass_match< LHS, RHS, ICmpInst, true > m_c_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
Matches an ICmp with a predicate over LHS and RHS in either order.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
Definition: PatternMatch.h:245
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:612
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition: DebugInfo.h:201
LLVM_ABI bool calculateFragmentIntersect(const DataLayout &DL, const Value *Dest, uint64_t SliceOffsetInBits, uint64_t SliceSizeInBits, const DbgVariableRecord *DVRAssign, std::optional< DIExpression::FragmentInfo > &Result)
Calculate the fragment of the variable in DAI covered from (Dest + SliceOffsetInBits) to to (Dest + S...
Definition: DebugInfo.cpp:1930
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
LLVM_ABI FunctionPass * createDeadStoreEliminationPass()
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:338
LLVM_ABI void initializeDSELegacyPassPass(PassRegistry &)
LLVM_ABI Constant * getInitialValueOfAllocation(const Value *V, const TargetLibraryInfo *TLI, Type *Ty)
If this is a call to an allocation function that initializes memory to a fixed value,...
bool isStrongerThanMonotonic(AtomicOrdering AO)
@ Uninitialized
Definition: Threading.h:60
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
AllocFnKind
Definition: Attributes.h:51
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition: Utils.cpp:1723
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
iterator_range< po_iterator< T > > post_order(const T &G)
LLVM_ABI bool isNoAliasCall(const Value *V)
Return true if this pointer is returned by a noalias function.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:402
LLVM_ABI bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, ObjectSizeOpts Opts={})
Compute the size of the object pointed by Ptr.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:49
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:1172
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
LLVM_ABI bool AreStatisticsEnabled()
Check if statistics are enabled.
Definition: Statistic.cpp:139
LLVM_ABI bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
LLVM_ABI Value * emitCalloc(Value *Num, Value *Size, IRBuilderBase &B, const TargetLibraryInfo &TLI, unsigned AddrSpace)
Emit a call to the calloc function.
uint64_t offsetToAlignment(uint64_t Value, Align Alignment)
Returns the offset to the next integer (mod 2**64) that is greater than or equal to Value and is a mu...
Definition: Alignment.h:197
LLVM_ABI bool salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
LLVM_ABI bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
LLVM_ABI Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
LLVM_ABI bool isIdentifiedFunctionLocal(const Value *V)
Return true if V is umabigously identified at the function-level.
LLVM_ABI Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
auto predecessors(const MachineBasicBlock *BB)
bool capturesAnything(CaptureComponents CC)
Definition: ModRef.h:319
LLVM_ABI bool mayContainIrreducibleControl(const Function &F, const LoopInfo *LI)
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
bool capturesNothing(CaptureComponents CC)
Definition: ModRef.h:315
LLVM_ABI bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
bool isStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice,...
bool isRefSet(const ModRefInfo MRI)
Definition: ModRef.h:52
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Various options to control the behavior of getObjectSize.
bool NullIsUnknownSize
If this is true, null pointers in address space 0 will be treated as though they can't be evaluated.
A MapVector that performs no allocations if smaller than a certain size.
Definition: MapVector.h:249