LLVM 22.0.0git
SROA.cpp
Go to the documentation of this file.
1//===- SROA.cpp - Scalar Replacement Of Aggregates ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This transformation implements the well known scalar replacement of
10/// aggregates transformation. It tries to identify promotable elements of an
11/// aggregate alloca, and promote them to registers. It will also try to
12/// convert uses of an element (or set of elements) of an alloca into a vector
13/// or bitfield-style integer scalar if appropriate.
14///
15/// It works to do this with minimal slicing of the alloca so that regions
16/// which are merely transferred in and out of external memory remain unchanged
17/// and are not decomposed to scalar code.
18///
19/// Because this also performs alloca promotion, it can be thought of as also
20/// serving the purpose of SSA formation. The algorithm iterates on the
21/// function until all opportunities for promotion have been realized.
22///
23//===----------------------------------------------------------------------===//
24
26#include "llvm/ADT/APInt.h"
27#include "llvm/ADT/ArrayRef.h"
28#include "llvm/ADT/DenseMap.h"
29#include "llvm/ADT/MapVector.h"
31#include "llvm/ADT/STLExtras.h"
32#include "llvm/ADT/SetVector.h"
36#include "llvm/ADT/Statistic.h"
37#include "llvm/ADT/StringRef.h"
38#include "llvm/ADT/Twine.h"
39#include "llvm/ADT/iterator.h"
44#include "llvm/Analysis/Loads.h"
47#include "llvm/Config/llvm-config.h"
48#include "llvm/IR/BasicBlock.h"
49#include "llvm/IR/Constant.h"
51#include "llvm/IR/Constants.h"
52#include "llvm/IR/DIBuilder.h"
53#include "llvm/IR/DataLayout.h"
54#include "llvm/IR/DebugInfo.h"
57#include "llvm/IR/Dominators.h"
58#include "llvm/IR/Function.h"
59#include "llvm/IR/GlobalAlias.h"
60#include "llvm/IR/IRBuilder.h"
61#include "llvm/IR/InstVisitor.h"
62#include "llvm/IR/Instruction.h"
65#include "llvm/IR/LLVMContext.h"
66#include "llvm/IR/Metadata.h"
67#include "llvm/IR/Module.h"
68#include "llvm/IR/Operator.h"
69#include "llvm/IR/PassManager.h"
70#include "llvm/IR/Type.h"
71#include "llvm/IR/Use.h"
72#include "llvm/IR/User.h"
73#include "llvm/IR/Value.h"
74#include "llvm/IR/ValueHandle.h"
76#include "llvm/Pass.h"
80#include "llvm/Support/Debug.h"
88#include <algorithm>
89#include <cassert>
90#include <cstddef>
91#include <cstdint>
92#include <cstring>
93#include <iterator>
94#include <string>
95#include <tuple>
96#include <utility>
97#include <variant>
98#include <vector>
99
100using namespace llvm;
101
102#define DEBUG_TYPE "sroa"
103
104STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement");
105STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed");
106STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions per alloca");
107STATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses rewritten");
108STATISTIC(MaxUsesPerAllocaPartition, "Maximum number of uses of a partition");
109STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced");
110STATISTIC(NumPromoted, "Number of allocas promoted to SSA values");
111STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion");
112STATISTIC(NumLoadsPredicated,
113 "Number of loads rewritten into predicated loads to allow promotion");
115 NumStoresPredicated,
116 "Number of stores rewritten into predicated loads to allow promotion");
117STATISTIC(NumDeleted, "Number of instructions deleted");
118STATISTIC(NumVectorized, "Number of vectorized aggregates");
119
120/// Disable running mem2reg during SROA in order to test or debug SROA.
121static cl::opt<bool> SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false),
122 cl::Hidden);
123namespace {
124
125class AllocaSliceRewriter;
126class AllocaSlices;
127class Partition;
128
129class SelectHandSpeculativity {
130 unsigned char Storage = 0; // None are speculatable by default.
131 using TrueVal = Bitfield::Element<bool, 0, 1>; // Low 0'th bit.
132 using FalseVal = Bitfield::Element<bool, 1, 1>; // Low 1'th bit.
133public:
134 SelectHandSpeculativity() = default;
135 SelectHandSpeculativity &setAsSpeculatable(bool isTrueVal);
136 bool isSpeculatable(bool isTrueVal) const;
137 bool areAllSpeculatable() const;
138 bool areAnySpeculatable() const;
139 bool areNoneSpeculatable() const;
140 // For interop as int half of PointerIntPair.
141 explicit operator intptr_t() const { return static_cast<intptr_t>(Storage); }
142 explicit SelectHandSpeculativity(intptr_t Storage_) : Storage(Storage_) {}
143};
144static_assert(sizeof(SelectHandSpeculativity) == sizeof(unsigned char));
145
146using PossiblySpeculatableLoad =
148using UnspeculatableStore = StoreInst *;
149using RewriteableMemOp =
150 std::variant<PossiblySpeculatableLoad, UnspeculatableStore>;
151using RewriteableMemOps = SmallVector<RewriteableMemOp, 2>;
152
153/// An optimization pass providing Scalar Replacement of Aggregates.
154///
155/// This pass takes allocations which can be completely analyzed (that is, they
156/// don't escape) and tries to turn them into scalar SSA values. There are
157/// a few steps to this process.
158///
159/// 1) It takes allocations of aggregates and analyzes the ways in which they
160/// are used to try to split them into smaller allocations, ideally of
161/// a single scalar data type. It will split up memcpy and memset accesses
162/// as necessary and try to isolate individual scalar accesses.
163/// 2) It will transform accesses into forms which are suitable for SSA value
164/// promotion. This can be replacing a memset with a scalar store of an
165/// integer value, or it can involve speculating operations on a PHI or
166/// select to be a PHI or select of the results.
167/// 3) Finally, this will try to detect a pattern of accesses which map cleanly
168/// onto insert and extract operations on a vector value, and convert them to
169/// this form. By doing so, it will enable promotion of vector aggregates to
170/// SSA vector values.
171class SROA {
172 LLVMContext *const C;
173 DomTreeUpdater *const DTU;
174 AssumptionCache *const AC;
175 const bool PreserveCFG;
176
177 /// Worklist of alloca instructions to simplify.
178 ///
179 /// Each alloca in the function is added to this. Each new alloca formed gets
180 /// added to it as well to recursively simplify unless that alloca can be
181 /// directly promoted. Finally, each time we rewrite a use of an alloca other
182 /// the one being actively rewritten, we add it back onto the list if not
183 /// already present to ensure it is re-visited.
185
186 /// A collection of instructions to delete.
187 /// We try to batch deletions to simplify code and make things a bit more
188 /// efficient. We also make sure there is no dangling pointers.
189 SmallVector<WeakVH, 8> DeadInsts;
190
191 /// Post-promotion worklist.
192 ///
193 /// Sometimes we discover an alloca which has a high probability of becoming
194 /// viable for SROA after a round of promotion takes place. In those cases,
195 /// the alloca is enqueued here for re-processing.
196 ///
197 /// Note that we have to be very careful to clear allocas out of this list in
198 /// the event they are deleted.
199 SmallSetVector<AllocaInst *, 16> PostPromotionWorklist;
200
201 /// A collection of alloca instructions we can directly promote.
204 PromotableAllocas;
205
206 /// A worklist of PHIs to speculate prior to promoting allocas.
207 ///
208 /// All of these PHIs have been checked for the safety of speculation and by
209 /// being speculated will allow promoting allocas currently in the promotable
210 /// queue.
211 SmallSetVector<PHINode *, 8> SpeculatablePHIs;
212
213 /// A worklist of select instructions to rewrite prior to promoting
214 /// allocas.
216
217 /// Select instructions that use an alloca and are subsequently loaded can be
218 /// rewritten to load both input pointers and then select between the result,
219 /// allowing the load of the alloca to be promoted.
220 /// From this:
221 /// %P2 = select i1 %cond, ptr %Alloca, ptr %Other
222 /// %V = load <type>, ptr %P2
223 /// to:
224 /// %V1 = load <type>, ptr %Alloca -> will be mem2reg'd
225 /// %V2 = load <type>, ptr %Other
226 /// %V = select i1 %cond, <type> %V1, <type> %V2
227 ///
228 /// We can do this to a select if its only uses are loads
229 /// and if either the operand to the select can be loaded unconditionally,
230 /// or if we are allowed to perform CFG modifications.
231 /// If found an intervening bitcast with a single use of the load,
232 /// allow the promotion.
233 static std::optional<RewriteableMemOps>
234 isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG);
235
236public:
237 SROA(LLVMContext *C, DomTreeUpdater *DTU, AssumptionCache *AC,
238 SROAOptions PreserveCFG_)
239 : C(C), DTU(DTU), AC(AC),
240 PreserveCFG(PreserveCFG_ == SROAOptions::PreserveCFG) {}
241
242 /// Main run method used by both the SROAPass and by the legacy pass.
243 std::pair<bool /*Changed*/, bool /*CFGChanged*/> runSROA(Function &F);
244
245private:
246 friend class AllocaSliceRewriter;
247
248 bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS);
249 AllocaInst *rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &P);
250 bool splitAlloca(AllocaInst &AI, AllocaSlices &AS);
251 bool propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS);
252 std::pair<bool /*Changed*/, bool /*CFGChanged*/> runOnAlloca(AllocaInst &AI);
253 void clobberUse(Use &U);
254 bool deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
255 bool promoteAllocas();
256};
257
258} // end anonymous namespace
259
260/// Calculate the fragment of a variable to use when slicing a store
261/// based on the slice dimensions, existing fragment, and base storage
262/// fragment.
263/// Results:
264/// UseFrag - Use Target as the new fragment.
265/// UseNoFrag - The new slice already covers the whole variable.
266/// Skip - The new alloca slice doesn't include this variable.
267/// FIXME: Can we use calculateFragmentIntersect instead?
268namespace {
269enum FragCalcResult { UseFrag, UseNoFrag, Skip };
270}
271static FragCalcResult
273 uint64_t NewStorageSliceOffsetInBits,
274 uint64_t NewStorageSliceSizeInBits,
275 std::optional<DIExpression::FragmentInfo> StorageFragment,
276 std::optional<DIExpression::FragmentInfo> CurrentFragment,
278 // If the base storage describes part of the variable apply the offset and
279 // the size constraint.
280 if (StorageFragment) {
281 Target.SizeInBits =
282 std::min(NewStorageSliceSizeInBits, StorageFragment->SizeInBits);
283 Target.OffsetInBits =
284 NewStorageSliceOffsetInBits + StorageFragment->OffsetInBits;
285 } else {
286 Target.SizeInBits = NewStorageSliceSizeInBits;
287 Target.OffsetInBits = NewStorageSliceOffsetInBits;
288 }
289
290 // If this slice extracts the entirety of an independent variable from a
291 // larger alloca, do not produce a fragment expression, as the variable is
292 // not fragmented.
293 if (!CurrentFragment) {
294 if (auto Size = Variable->getSizeInBits()) {
295 // Treat the current fragment as covering the whole variable.
296 CurrentFragment = DIExpression::FragmentInfo(*Size, 0);
297 if (Target == CurrentFragment)
298 return UseNoFrag;
299 }
300 }
301
302 // No additional work to do if there isn't a fragment already, or there is
303 // but it already exactly describes the new assignment.
304 if (!CurrentFragment || *CurrentFragment == Target)
305 return UseFrag;
306
307 // Reject the target fragment if it doesn't fit wholly within the current
308 // fragment. TODO: We could instead chop up the target to fit in the case of
309 // a partial overlap.
310 if (Target.startInBits() < CurrentFragment->startInBits() ||
311 Target.endInBits() > CurrentFragment->endInBits())
312 return Skip;
313
314 // Target fits within the current fragment, return it.
315 return UseFrag;
316}
317
319 return DebugVariable(DVR->getVariable(), std::nullopt,
320 DVR->getDebugLoc().getInlinedAt());
321}
322
323/// Find linked dbg.assign and generate a new one with the correct
324/// FragmentInfo. Link Inst to the new dbg.assign. If Value is nullptr the
325/// value component is copied from the old dbg.assign to the new.
326/// \param OldAlloca Alloca for the variable before splitting.
327/// \param IsSplit True if the store (not necessarily alloca)
328/// is being split.
329/// \param OldAllocaOffsetInBits Offset of the slice taken from OldAlloca.
330/// \param SliceSizeInBits New number of bits being written to.
331/// \param OldInst Instruction that is being split.
332/// \param Inst New instruction performing this part of the
333/// split store.
334/// \param Dest Store destination.
335/// \param Value Stored value.
336/// \param DL Datalayout.
337static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit,
338 uint64_t OldAllocaOffsetInBits,
339 uint64_t SliceSizeInBits, Instruction *OldInst,
340 Instruction *Inst, Value *Dest, Value *Value,
341 const DataLayout &DL) {
342 auto DVRAssignMarkerRange = at::getDVRAssignmentMarkers(OldInst);
343 // Nothing to do if OldInst has no linked dbg.assign intrinsics.
344 if (DVRAssignMarkerRange.empty())
345 return;
346
347 LLVM_DEBUG(dbgs() << " migrateDebugInfo\n");
348 LLVM_DEBUG(dbgs() << " OldAlloca: " << *OldAlloca << "\n");
349 LLVM_DEBUG(dbgs() << " IsSplit: " << IsSplit << "\n");
350 LLVM_DEBUG(dbgs() << " OldAllocaOffsetInBits: " << OldAllocaOffsetInBits
351 << "\n");
352 LLVM_DEBUG(dbgs() << " SliceSizeInBits: " << SliceSizeInBits << "\n");
353 LLVM_DEBUG(dbgs() << " OldInst: " << *OldInst << "\n");
354 LLVM_DEBUG(dbgs() << " Inst: " << *Inst << "\n");
355 LLVM_DEBUG(dbgs() << " Dest: " << *Dest << "\n");
356 if (Value)
357 LLVM_DEBUG(dbgs() << " Value: " << *Value << "\n");
358
359 /// Map of aggregate variables to their fragment associated with OldAlloca.
361 BaseFragments;
362 for (auto *DVR : at::getDVRAssignmentMarkers(OldAlloca))
363 BaseFragments[getAggregateVariable(DVR)] =
364 DVR->getExpression()->getFragmentInfo();
365
366 // The new inst needs a DIAssignID unique metadata tag (if OldInst has
367 // one). It shouldn't already have one: assert this assumption.
368 assert(!Inst->getMetadata(LLVMContext::MD_DIAssignID));
369 DIAssignID *NewID = nullptr;
370 auto &Ctx = Inst->getContext();
371 DIBuilder DIB(*OldInst->getModule(), /*AllowUnresolved*/ false);
372 assert(OldAlloca->isStaticAlloca());
373
374 auto MigrateDbgAssign = [&](DbgVariableRecord *DbgAssign) {
375 LLVM_DEBUG(dbgs() << " existing dbg.assign is: " << *DbgAssign
376 << "\n");
377 auto *Expr = DbgAssign->getExpression();
378 bool SetKillLocation = false;
379
380 if (IsSplit) {
381 std::optional<DIExpression::FragmentInfo> BaseFragment;
382 {
383 auto R = BaseFragments.find(getAggregateVariable(DbgAssign));
384 if (R == BaseFragments.end())
385 return;
386 BaseFragment = R->second;
387 }
388 std::optional<DIExpression::FragmentInfo> CurrentFragment =
389 Expr->getFragmentInfo();
390 DIExpression::FragmentInfo NewFragment;
391 FragCalcResult Result = calculateFragment(
392 DbgAssign->getVariable(), OldAllocaOffsetInBits, SliceSizeInBits,
393 BaseFragment, CurrentFragment, NewFragment);
394
395 if (Result == Skip)
396 return;
397 if (Result == UseFrag && !(NewFragment == CurrentFragment)) {
398 if (CurrentFragment) {
399 // Rewrite NewFragment to be relative to the existing one (this is
400 // what createFragmentExpression wants). CalculateFragment has
401 // already resolved the size for us. FIXME: Should it return the
402 // relative fragment too?
403 NewFragment.OffsetInBits -= CurrentFragment->OffsetInBits;
404 }
405 // Add the new fragment info to the existing expression if possible.
407 Expr, NewFragment.OffsetInBits, NewFragment.SizeInBits)) {
408 Expr = *E;
409 } else {
410 // Otherwise, add the new fragment info to an empty expression and
411 // discard the value component of this dbg.assign as the value cannot
412 // be computed with the new fragment.
414 DIExpression::get(Expr->getContext(), {}),
415 NewFragment.OffsetInBits, NewFragment.SizeInBits);
416 SetKillLocation = true;
417 }
418 }
419 }
420
421 // If we haven't created a DIAssignID ID do that now and attach it to Inst.
422 if (!NewID) {
423 NewID = DIAssignID::getDistinct(Ctx);
424 Inst->setMetadata(LLVMContext::MD_DIAssignID, NewID);
425 }
426
427 ::Value *NewValue = Value ? Value : DbgAssign->getValue();
428 DbgVariableRecord *NewAssign = cast<DbgVariableRecord>(cast<DbgRecord *>(
429 DIB.insertDbgAssign(Inst, NewValue, DbgAssign->getVariable(), Expr,
430 Dest, DIExpression::get(Expr->getContext(), {}),
431 DbgAssign->getDebugLoc())));
432
433 // If we've updated the value but the original dbg.assign has an arglist
434 // then kill it now - we can't use the requested new value.
435 // We can't replace the DIArgList with the new value as it'd leave
436 // the DIExpression in an invalid state (DW_OP_LLVM_arg operands without
437 // an arglist). And we can't keep the DIArgList in case the linked store
438 // is being split - in which case the DIArgList + expression may no longer
439 // be computing the correct value.
440 // This should be a very rare situation as it requires the value being
441 // stored to differ from the dbg.assign (i.e., the value has been
442 // represented differently in the debug intrinsic for some reason).
443 SetKillLocation |=
444 Value && (DbgAssign->hasArgList() ||
445 !DbgAssign->getExpression()->isSingleLocationExpression());
446 if (SetKillLocation)
447 NewAssign->setKillLocation();
448
449 // We could use more precision here at the cost of some additional (code)
450 // complexity - if the original dbg.assign was adjacent to its store, we
451 // could position this new dbg.assign adjacent to its store rather than the
452 // old dbg.assgn. That would result in interleaved dbg.assigns rather than
453 // what we get now:
454 // split store !1
455 // split store !2
456 // dbg.assign !1
457 // dbg.assign !2
458 // This (current behaviour) results results in debug assignments being
459 // noted as slightly offset (in code) from the store. In practice this
460 // should have little effect on the debugging experience due to the fact
461 // that all the split stores should get the same line number.
462 NewAssign->moveBefore(DbgAssign->getIterator());
463
464 NewAssign->setDebugLoc(DbgAssign->getDebugLoc());
465 LLVM_DEBUG(dbgs() << "Created new assign: " << *NewAssign << "\n");
466 };
467
468 for_each(DVRAssignMarkerRange, MigrateDbgAssign);
469}
470
471namespace {
472
473/// A custom IRBuilder inserter which prefixes all names, but only in
474/// Assert builds.
475class IRBuilderPrefixedInserter final : public IRBuilderDefaultInserter {
476 std::string Prefix;
477
478 Twine getNameWithPrefix(const Twine &Name) const {
479 return Name.isTriviallyEmpty() ? Name : Prefix + Name;
480 }
481
482public:
483 void SetNamePrefix(const Twine &P) { Prefix = P.str(); }
484
485 void InsertHelper(Instruction *I, const Twine &Name,
486 BasicBlock::iterator InsertPt) const override {
488 InsertPt);
489 }
490};
491
492/// Provide a type for IRBuilder that drops names in release builds.
494
495/// A used slice of an alloca.
496///
497/// This structure represents a slice of an alloca used by some instruction. It
498/// stores both the begin and end offsets of this use, a pointer to the use
499/// itself, and a flag indicating whether we can classify the use as splittable
500/// or not when forming partitions of the alloca.
501class Slice {
502 /// The beginning offset of the range.
503 uint64_t BeginOffset = 0;
504
505 /// The ending offset, not included in the range.
506 uint64_t EndOffset = 0;
507
508 /// Storage for both the use of this slice and whether it can be
509 /// split.
510 PointerIntPair<Use *, 1, bool> UseAndIsSplittable;
511
512public:
513 Slice() = default;
514
515 Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U, bool IsSplittable)
516 : BeginOffset(BeginOffset), EndOffset(EndOffset),
517 UseAndIsSplittable(U, IsSplittable) {}
518
519 uint64_t beginOffset() const { return BeginOffset; }
520 uint64_t endOffset() const { return EndOffset; }
521
522 bool isSplittable() const { return UseAndIsSplittable.getInt(); }
523 void makeUnsplittable() { UseAndIsSplittable.setInt(false); }
524
525 Use *getUse() const { return UseAndIsSplittable.getPointer(); }
526
527 bool isDead() const { return getUse() == nullptr; }
528 void kill() { UseAndIsSplittable.setPointer(nullptr); }
529
530 /// Support for ordering ranges.
531 ///
532 /// This provides an ordering over ranges such that start offsets are
533 /// always increasing, and within equal start offsets, the end offsets are
534 /// decreasing. Thus the spanning range comes first in a cluster with the
535 /// same start position.
536 bool operator<(const Slice &RHS) const {
537 if (beginOffset() < RHS.beginOffset())
538 return true;
539 if (beginOffset() > RHS.beginOffset())
540 return false;
541 if (isSplittable() != RHS.isSplittable())
542 return !isSplittable();
543 if (endOffset() > RHS.endOffset())
544 return true;
545 return false;
546 }
547
548 /// Support comparison with a single offset to allow binary searches.
549 friend LLVM_ATTRIBUTE_UNUSED bool operator<(const Slice &LHS,
550 uint64_t RHSOffset) {
551 return LHS.beginOffset() < RHSOffset;
552 }
553 friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset,
554 const Slice &RHS) {
555 return LHSOffset < RHS.beginOffset();
556 }
557
558 bool operator==(const Slice &RHS) const {
559 return isSplittable() == RHS.isSplittable() &&
560 beginOffset() == RHS.beginOffset() && endOffset() == RHS.endOffset();
561 }
562 bool operator!=(const Slice &RHS) const { return !operator==(RHS); }
563};
564
565/// Representation of the alloca slices.
566///
567/// This class represents the slices of an alloca which are formed by its
568/// various uses. If a pointer escapes, we can't fully build a representation
569/// for the slices used and we reflect that in this structure. The uses are
570/// stored, sorted by increasing beginning offset and with unsplittable slices
571/// starting at a particular offset before splittable slices.
572class AllocaSlices {
573public:
574 /// Construct the slices of a particular alloca.
575 AllocaSlices(const DataLayout &DL, AllocaInst &AI);
576
577 /// Test whether a pointer to the allocation escapes our analysis.
578 ///
579 /// If this is true, the slices are never fully built and should be
580 /// ignored.
581 bool isEscaped() const { return PointerEscapingInstr; }
582 bool isEscapedReadOnly() const { return PointerEscapingInstrReadOnly; }
583
584 /// Support for iterating over the slices.
585 /// @{
586 using iterator = SmallVectorImpl<Slice>::iterator;
587 using range = iterator_range<iterator>;
588
589 iterator begin() { return Slices.begin(); }
590 iterator end() { return Slices.end(); }
591
593 using const_range = iterator_range<const_iterator>;
594
595 const_iterator begin() const { return Slices.begin(); }
596 const_iterator end() const { return Slices.end(); }
597 /// @}
598
599 /// Erase a range of slices.
600 void erase(iterator Start, iterator Stop) { Slices.erase(Start, Stop); }
601
602 /// Insert new slices for this alloca.
603 ///
604 /// This moves the slices into the alloca's slices collection, and re-sorts
605 /// everything so that the usual ordering properties of the alloca's slices
606 /// hold.
607 void insert(ArrayRef<Slice> NewSlices) {
608 int OldSize = Slices.size();
609 Slices.append(NewSlices.begin(), NewSlices.end());
610 auto SliceI = Slices.begin() + OldSize;
611 std::stable_sort(SliceI, Slices.end());
612 std::inplace_merge(Slices.begin(), SliceI, Slices.end());
613 }
614
615 // Forward declare the iterator and range accessor for walking the
616 // partitions.
617 class partition_iterator;
619
620 /// Access the dead users for this alloca.
621 ArrayRef<Instruction *> getDeadUsers() const { return DeadUsers; }
622
623 /// Access Uses that should be dropped if the alloca is promotable.
624 ArrayRef<Use *> getDeadUsesIfPromotable() const {
625 return DeadUseIfPromotable;
626 }
627
628 /// Access the dead operands referring to this alloca.
629 ///
630 /// These are operands which have cannot actually be used to refer to the
631 /// alloca as they are outside its range and the user doesn't correct for
632 /// that. These mostly consist of PHI node inputs and the like which we just
633 /// need to replace with undef.
634 ArrayRef<Use *> getDeadOperands() const { return DeadOperands; }
635
636#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
637 void print(raw_ostream &OS, const_iterator I, StringRef Indent = " ") const;
638 void printSlice(raw_ostream &OS, const_iterator I,
639 StringRef Indent = " ") const;
640 void printUse(raw_ostream &OS, const_iterator I,
641 StringRef Indent = " ") const;
642 void print(raw_ostream &OS) const;
643 void dump(const_iterator I) const;
644 void dump() const;
645#endif
646
647private:
648 template <typename DerivedT, typename RetT = void> class BuilderBase;
649 class SliceBuilder;
650
651 friend class AllocaSlices::SliceBuilder;
652
653#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
654 /// Handle to alloca instruction to simplify method interfaces.
655 AllocaInst &AI;
656#endif
657
658 /// The instruction responsible for this alloca not having a known set
659 /// of slices.
660 ///
661 /// When an instruction (potentially) escapes the pointer to the alloca, we
662 /// store a pointer to that here and abort trying to form slices of the
663 /// alloca. This will be null if the alloca slices are analyzed successfully.
664 Instruction *PointerEscapingInstr;
665 Instruction *PointerEscapingInstrReadOnly;
666
667 /// The slices of the alloca.
668 ///
669 /// We store a vector of the slices formed by uses of the alloca here. This
670 /// vector is sorted by increasing begin offset, and then the unsplittable
671 /// slices before the splittable ones. See the Slice inner class for more
672 /// details.
674
675 /// Instructions which will become dead if we rewrite the alloca.
676 ///
677 /// Note that these are not separated by slice. This is because we expect an
678 /// alloca to be completely rewritten or not rewritten at all. If rewritten,
679 /// all these instructions can simply be removed and replaced with poison as
680 /// they come from outside of the allocated space.
682
683 /// Uses which will become dead if can promote the alloca.
684 SmallVector<Use *, 8> DeadUseIfPromotable;
685
686 /// Operands which will become dead if we rewrite the alloca.
687 ///
688 /// These are operands that in their particular use can be replaced with
689 /// poison when we rewrite the alloca. These show up in out-of-bounds inputs
690 /// to PHI nodes and the like. They aren't entirely dead (there might be
691 /// a GEP back into the bounds using it elsewhere) and nor is the PHI, but we
692 /// want to swap this particular input for poison to simplify the use lists of
693 /// the alloca.
694 SmallVector<Use *, 8> DeadOperands;
695};
696
697/// A partition of the slices.
698///
699/// An ephemeral representation for a range of slices which can be viewed as
700/// a partition of the alloca. This range represents a span of the alloca's
701/// memory which cannot be split, and provides access to all of the slices
702/// overlapping some part of the partition.
703///
704/// Objects of this type are produced by traversing the alloca's slices, but
705/// are only ephemeral and not persistent.
706class Partition {
707private:
708 friend class AllocaSlices;
710
711 using iterator = AllocaSlices::iterator;
712
713 /// The beginning and ending offsets of the alloca for this
714 /// partition.
715 uint64_t BeginOffset = 0, EndOffset = 0;
716
717 /// The start and end iterators of this partition.
718 iterator SI, SJ;
719
720 /// A collection of split slice tails overlapping the partition.
721 SmallVector<Slice *, 4> SplitTails;
722
723 /// Raw constructor builds an empty partition starting and ending at
724 /// the given iterator.
725 Partition(iterator SI) : SI(SI), SJ(SI) {}
726
727public:
728 /// The start offset of this partition.
729 ///
730 /// All of the contained slices start at or after this offset.
731 uint64_t beginOffset() const { return BeginOffset; }
732
733 /// The end offset of this partition.
734 ///
735 /// All of the contained slices end at or before this offset.
736 uint64_t endOffset() const { return EndOffset; }
737
738 /// The size of the partition.
739 ///
740 /// Note that this can never be zero.
741 uint64_t size() const {
742 assert(BeginOffset < EndOffset && "Partitions must span some bytes!");
743 return EndOffset - BeginOffset;
744 }
745
746 /// Test whether this partition contains no slices, and merely spans
747 /// a region occupied by split slices.
748 bool empty() const { return SI == SJ; }
749
750 /// \name Iterate slices that start within the partition.
751 /// These may be splittable or unsplittable. They have a begin offset >= the
752 /// partition begin offset.
753 /// @{
754 // FIXME: We should probably define a "concat_iterator" helper and use that
755 // to stitch together pointee_iterators over the split tails and the
756 // contiguous iterators of the partition. That would give a much nicer
757 // interface here. We could then additionally expose filtered iterators for
758 // split, unsplit, and unsplittable splices based on the usage patterns.
759 iterator begin() const { return SI; }
760 iterator end() const { return SJ; }
761 /// @}
762
763 /// Get the sequence of split slice tails.
764 ///
765 /// These tails are of slices which start before this partition but are
766 /// split and overlap into the partition. We accumulate these while forming
767 /// partitions.
768 ArrayRef<Slice *> splitSliceTails() const { return SplitTails; }
769};
770
771} // end anonymous namespace
772
773/// An iterator over partitions of the alloca's slices.
774///
775/// This iterator implements the core algorithm for partitioning the alloca's
776/// slices. It is a forward iterator as we don't support backtracking for
777/// efficiency reasons, and re-use a single storage area to maintain the
778/// current set of split slices.
779///
780/// It is templated on the slice iterator type to use so that it can operate
781/// with either const or non-const slice iterators.
783 : public iterator_facade_base<partition_iterator, std::forward_iterator_tag,
784 Partition> {
785 friend class AllocaSlices;
786
787 /// Most of the state for walking the partitions is held in a class
788 /// with a nice interface for examining them.
789 Partition P;
790
791 /// We need to keep the end of the slices to know when to stop.
792 AllocaSlices::iterator SE;
793
794 /// We also need to keep track of the maximum split end offset seen.
795 /// FIXME: Do we really?
796 uint64_t MaxSplitSliceEndOffset = 0;
797
798 /// Sets the partition to be empty at given iterator, and sets the
799 /// end iterator.
800 partition_iterator(AllocaSlices::iterator SI, AllocaSlices::iterator SE)
801 : P(SI), SE(SE) {
802 // If not already at the end, advance our state to form the initial
803 // partition.
804 if (SI != SE)
805 advance();
806 }
807
808 /// Advance the iterator to the next partition.
809 ///
810 /// Requires that the iterator not be at the end of the slices.
811 void advance() {
812 assert((P.SI != SE || !P.SplitTails.empty()) &&
813 "Cannot advance past the end of the slices!");
814
815 // Clear out any split uses which have ended.
816 if (!P.SplitTails.empty()) {
817 if (P.EndOffset >= MaxSplitSliceEndOffset) {
818 // If we've finished all splits, this is easy.
819 P.SplitTails.clear();
820 MaxSplitSliceEndOffset = 0;
821 } else {
822 // Remove the uses which have ended in the prior partition. This
823 // cannot change the max split slice end because we just checked that
824 // the prior partition ended prior to that max.
825 llvm::erase_if(P.SplitTails,
826 [&](Slice *S) { return S->endOffset() <= P.EndOffset; });
827 assert(llvm::any_of(P.SplitTails,
828 [&](Slice *S) {
829 return S->endOffset() == MaxSplitSliceEndOffset;
830 }) &&
831 "Could not find the current max split slice offset!");
832 assert(llvm::all_of(P.SplitTails,
833 [&](Slice *S) {
834 return S->endOffset() <= MaxSplitSliceEndOffset;
835 }) &&
836 "Max split slice end offset is not actually the max!");
837 }
838 }
839
840 // If P.SI is already at the end, then we've cleared the split tail and
841 // now have an end iterator.
842 if (P.SI == SE) {
843 assert(P.SplitTails.empty() && "Failed to clear the split slices!");
844 return;
845 }
846
847 // If we had a non-empty partition previously, set up the state for
848 // subsequent partitions.
849 if (P.SI != P.SJ) {
850 // Accumulate all the splittable slices which started in the old
851 // partition into the split list.
852 for (Slice &S : P)
853 if (S.isSplittable() && S.endOffset() > P.EndOffset) {
854 P.SplitTails.push_back(&S);
855 MaxSplitSliceEndOffset =
856 std::max(S.endOffset(), MaxSplitSliceEndOffset);
857 }
858
859 // Start from the end of the previous partition.
860 P.SI = P.SJ;
861
862 // If P.SI is now at the end, we at most have a tail of split slices.
863 if (P.SI == SE) {
864 P.BeginOffset = P.EndOffset;
865 P.EndOffset = MaxSplitSliceEndOffset;
866 return;
867 }
868
869 // If the we have split slices and the next slice is after a gap and is
870 // not splittable immediately form an empty partition for the split
871 // slices up until the next slice begins.
872 if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&
873 !P.SI->isSplittable()) {
874 P.BeginOffset = P.EndOffset;
875 P.EndOffset = P.SI->beginOffset();
876 return;
877 }
878 }
879
880 // OK, we need to consume new slices. Set the end offset based on the
881 // current slice, and step SJ past it. The beginning offset of the
882 // partition is the beginning offset of the next slice unless we have
883 // pre-existing split slices that are continuing, in which case we begin
884 // at the prior end offset.
885 P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;
886 P.EndOffset = P.SI->endOffset();
887 ++P.SJ;
888
889 // There are two strategies to form a partition based on whether the
890 // partition starts with an unsplittable slice or a splittable slice.
891 if (!P.SI->isSplittable()) {
892 // When we're forming an unsplittable region, it must always start at
893 // the first slice and will extend through its end.
894 assert(P.BeginOffset == P.SI->beginOffset());
895
896 // Form a partition including all of the overlapping slices with this
897 // unsplittable slice.
898 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
899 if (!P.SJ->isSplittable())
900 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
901 ++P.SJ;
902 }
903
904 // We have a partition across a set of overlapping unsplittable
905 // partitions.
906 return;
907 }
908
909 // If we're starting with a splittable slice, then we need to form
910 // a synthetic partition spanning it and any other overlapping splittable
911 // splices.
912 assert(P.SI->isSplittable() && "Forming a splittable partition!");
913
914 // Collect all of the overlapping splittable slices.
915 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&
916 P.SJ->isSplittable()) {
917 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
918 ++P.SJ;
919 }
920
921 // Back upiP.EndOffset if we ended the span early when encountering an
922 // unsplittable slice. This synthesizes the early end offset of
923 // a partition spanning only splittable slices.
924 if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
925 assert(!P.SJ->isSplittable());
926 P.EndOffset = P.SJ->beginOffset();
927 }
928 }
929
930public:
931 bool operator==(const partition_iterator &RHS) const {
932 assert(SE == RHS.SE &&
933 "End iterators don't match between compared partition iterators!");
934
935 // The observed positions of partitions is marked by the P.SI iterator and
936 // the emptiness of the split slices. The latter is only relevant when
937 // P.SI == SE, as the end iterator will additionally have an empty split
938 // slices list, but the prior may have the same P.SI and a tail of split
939 // slices.
940 if (P.SI == RHS.P.SI && P.SplitTails.empty() == RHS.P.SplitTails.empty()) {
941 assert(P.SJ == RHS.P.SJ &&
942 "Same set of slices formed two different sized partitions!");
943 assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&
944 "Same slice position with differently sized non-empty split "
945 "slice tails!");
946 return true;
947 }
948 return false;
949 }
950
952 advance();
953 return *this;
954 }
955
956 Partition &operator*() { return P; }
957};
958
959/// A forward range over the partitions of the alloca's slices.
960///
961/// This accesses an iterator range over the partitions of the alloca's
962/// slices. It computes these partitions on the fly based on the overlapping
963/// offsets of the slices and the ability to split them. It will visit "empty"
964/// partitions to cover regions of the alloca only accessed via split
965/// slices.
966iterator_range<AllocaSlices::partition_iterator> AllocaSlices::partitions() {
967 return make_range(partition_iterator(begin(), end()),
968 partition_iterator(end(), end()));
969}
970
972 // If the condition being selected on is a constant or the same value is
973 // being selected between, fold the select. Yes this does (rarely) happen
974 // early on.
975 if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition()))
976 return SI.getOperand(1 + CI->isZero());
977 if (SI.getOperand(1) == SI.getOperand(2))
978 return SI.getOperand(1);
979
980 return nullptr;
981}
982
983/// A helper that folds a PHI node or a select.
985 if (PHINode *PN = dyn_cast<PHINode>(&I)) {
986 // If PN merges together the same value, return that value.
987 return PN->hasConstantValue();
988 }
989 return foldSelectInst(cast<SelectInst>(I));
990}
991
992/// Builder for the alloca slices.
993///
994/// This class builds a set of alloca slices by recursively visiting the uses
995/// of an alloca and making a slice for each load and store at each offset.
996class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
997 friend class PtrUseVisitor<SliceBuilder>;
998 friend class InstVisitor<SliceBuilder>;
999
1001
1002 const uint64_t AllocSize;
1003 AllocaSlices &AS;
1004
1005 SmallDenseMap<Instruction *, unsigned> MemTransferSliceMap;
1007
1008 /// Set to de-duplicate dead instructions found in the use walk.
1009 SmallPtrSet<Instruction *, 4> VisitedDeadInsts;
1010
1011public:
1012 SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
1014 AllocSize(DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue()),
1015 AS(AS) {}
1016
1017private:
1018 void markAsDead(Instruction &I) {
1019 if (VisitedDeadInsts.insert(&I).second)
1020 AS.DeadUsers.push_back(&I);
1021 }
1022
1023 void insertUse(Instruction &I, const APInt &Offset, uint64_t Size,
1024 bool IsSplittable = false) {
1025 // Completely skip uses which have a zero size or start either before or
1026 // past the end of the allocation.
1027 if (Size == 0 || Offset.uge(AllocSize)) {
1028 LLVM_DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @"
1029 << Offset
1030 << " which has zero size or starts outside of the "
1031 << AllocSize << " byte alloca:\n"
1032 << " alloca: " << AS.AI << "\n"
1033 << " use: " << I << "\n");
1034 return markAsDead(I);
1035 }
1036
1037 uint64_t BeginOffset = Offset.getZExtValue();
1038 uint64_t EndOffset = BeginOffset + Size;
1039
1040 // Clamp the end offset to the end of the allocation. Note that this is
1041 // formulated to handle even the case where "BeginOffset + Size" overflows.
1042 // This may appear superficially to be something we could ignore entirely,
1043 // but that is not so! There may be widened loads or PHI-node uses where
1044 // some instructions are dead but not others. We can't completely ignore
1045 // them, and so have to record at least the information here.
1046 assert(AllocSize >= BeginOffset); // Established above.
1047 if (Size > AllocSize - BeginOffset) {
1048 LLVM_DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @"
1049 << Offset << " to remain within the " << AllocSize
1050 << " byte alloca:\n"
1051 << " alloca: " << AS.AI << "\n"
1052 << " use: " << I << "\n");
1053 EndOffset = AllocSize;
1054 }
1055
1056 AS.Slices.push_back(Slice(BeginOffset, EndOffset, U, IsSplittable));
1057 }
1058
1059 void visitBitCastInst(BitCastInst &BC) {
1060 if (BC.use_empty())
1061 return markAsDead(BC);
1062
1063 return Base::visitBitCastInst(BC);
1064 }
1065
1066 void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
1067 if (ASC.use_empty())
1068 return markAsDead(ASC);
1069
1070 return Base::visitAddrSpaceCastInst(ASC);
1071 }
1072
1073 void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
1074 if (GEPI.use_empty())
1075 return markAsDead(GEPI);
1076
1077 return Base::visitGetElementPtrInst(GEPI);
1078 }
1079
1080 void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset,
1081 uint64_t Size, bool IsVolatile) {
1082 // We allow splitting of non-volatile loads and stores where the type is an
1083 // integer type. These may be used to implement 'memcpy' or other "transfer
1084 // of bits" patterns.
1085 bool IsSplittable =
1087
1088 insertUse(I, Offset, Size, IsSplittable);
1089 }
1090
1091 void visitLoadInst(LoadInst &LI) {
1092 assert((!LI.isSimple() || LI.getType()->isSingleValueType()) &&
1093 "All simple FCA loads should have been pre-split");
1094
1095 // If there is a load with an unknown offset, we can still perform store
1096 // to load forwarding for other known-offset loads.
1097 if (!IsOffsetKnown)
1098 return PI.setEscapedReadOnly(&LI);
1099
1101 if (Size.isScalable()) {
1102 unsigned VScale = LI.getFunction()->getVScaleValue();
1103 if (!VScale)
1104 return PI.setAborted(&LI);
1105
1106 Size = TypeSize::getFixed(Size.getKnownMinValue() * VScale);
1107 }
1108
1109 return handleLoadOrStore(LI.getType(), LI, Offset, Size.getFixedValue(),
1110 LI.isVolatile());
1111 }
1112
1113 void visitStoreInst(StoreInst &SI) {
1114 Value *ValOp = SI.getValueOperand();
1115 if (ValOp == *U)
1116 return PI.setEscapedAndAborted(&SI);
1117 if (!IsOffsetKnown)
1118 return PI.setAborted(&SI);
1119
1120 TypeSize StoreSize = DL.getTypeStoreSize(ValOp->getType());
1121 if (StoreSize.isScalable()) {
1122 unsigned VScale = SI.getFunction()->getVScaleValue();
1123 if (!VScale)
1124 return PI.setAborted(&SI);
1125
1126 StoreSize = TypeSize::getFixed(StoreSize.getKnownMinValue() * VScale);
1127 }
1128
1129 uint64_t Size = StoreSize.getFixedValue();
1130
1131 // If this memory access can be shown to *statically* extend outside the
1132 // bounds of the allocation, it's behavior is undefined, so simply
1133 // ignore it. Note that this is more strict than the generic clamping
1134 // behavior of insertUse. We also try to handle cases which might run the
1135 // risk of overflow.
1136 // FIXME: We should instead consider the pointer to have escaped if this
1137 // function is being instrumented for addressing bugs or race conditions.
1138 if (Size > AllocSize || Offset.ugt(AllocSize - Size)) {
1139 LLVM_DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @"
1140 << Offset << " which extends past the end of the "
1141 << AllocSize << " byte alloca:\n"
1142 << " alloca: " << AS.AI << "\n"
1143 << " use: " << SI << "\n");
1144 return markAsDead(SI);
1145 }
1146
1147 assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
1148 "All simple FCA stores should have been pre-split");
1149 handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile());
1150 }
1151
1152 void visitMemSetInst(MemSetInst &II) {
1153 assert(II.getRawDest() == *U && "Pointer use is not the destination?");
1154 ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
1155 if ((Length && Length->getValue() == 0) ||
1156 (IsOffsetKnown && Offset.uge(AllocSize)))
1157 // Zero-length mem transfer intrinsics can be ignored entirely.
1158 return markAsDead(II);
1159
1160 if (!IsOffsetKnown)
1161 return PI.setAborted(&II);
1162
1163 insertUse(II, Offset,
1164 Length ? Length->getLimitedValue()
1165 : AllocSize - Offset.getLimitedValue(),
1166 (bool)Length);
1167 }
1168
1169 void visitMemTransferInst(MemTransferInst &II) {
1170 ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
1171 if (Length && Length->getValue() == 0)
1172 // Zero-length mem transfer intrinsics can be ignored entirely.
1173 return markAsDead(II);
1174
1175 // Because we can visit these intrinsics twice, also check to see if the
1176 // first time marked this instruction as dead. If so, skip it.
1177 if (VisitedDeadInsts.count(&II))
1178 return;
1179
1180 if (!IsOffsetKnown)
1181 return PI.setAborted(&II);
1182
1183 // This side of the transfer is completely out-of-bounds, and so we can
1184 // nuke the entire transfer. However, we also need to nuke the other side
1185 // if already added to our partitions.
1186 // FIXME: Yet another place we really should bypass this when
1187 // instrumenting for ASan.
1188 if (Offset.uge(AllocSize)) {
1190 MemTransferSliceMap.find(&II);
1191 if (MTPI != MemTransferSliceMap.end())
1192 AS.Slices[MTPI->second].kill();
1193 return markAsDead(II);
1194 }
1195
1196 uint64_t RawOffset = Offset.getLimitedValue();
1197 uint64_t Size = Length ? Length->getLimitedValue() : AllocSize - RawOffset;
1198
1199 // Check for the special case where the same exact value is used for both
1200 // source and dest.
1201 if (*U == II.getRawDest() && *U == II.getRawSource()) {
1202 // For non-volatile transfers this is a no-op.
1203 if (!II.isVolatile())
1204 return markAsDead(II);
1205
1206 return insertUse(II, Offset, Size, /*IsSplittable=*/false);
1207 }
1208
1209 // If we have seen both source and destination for a mem transfer, then
1210 // they both point to the same alloca.
1211 bool Inserted;
1213 std::tie(MTPI, Inserted) =
1214 MemTransferSliceMap.insert(std::make_pair(&II, AS.Slices.size()));
1215 unsigned PrevIdx = MTPI->second;
1216 if (!Inserted) {
1217 Slice &PrevP = AS.Slices[PrevIdx];
1218
1219 // Check if the begin offsets match and this is a non-volatile transfer.
1220 // In that case, we can completely elide the transfer.
1221 if (!II.isVolatile() && PrevP.beginOffset() == RawOffset) {
1222 PrevP.kill();
1223 return markAsDead(II);
1224 }
1225
1226 // Otherwise we have an offset transfer within the same alloca. We can't
1227 // split those.
1228 PrevP.makeUnsplittable();
1229 }
1230
1231 // Insert the use now that we've fixed up the splittable nature.
1232 insertUse(II, Offset, Size, /*IsSplittable=*/Inserted && Length);
1233
1234 // Check that we ended up with a valid index in the map.
1235 assert(AS.Slices[PrevIdx].getUse()->getUser() == &II &&
1236 "Map index doesn't point back to a slice with this user.");
1237 }
1238
1239 // Disable SRoA for any intrinsics except for lifetime invariants.
1240 // FIXME: What about debug intrinsics? This matches old behavior, but
1241 // doesn't make sense.
1242 void visitIntrinsicInst(IntrinsicInst &II) {
1243 if (II.isDroppable()) {
1244 AS.DeadUseIfPromotable.push_back(U);
1245 return;
1246 }
1247
1248 if (!IsOffsetKnown)
1249 return PI.setAborted(&II);
1250
1251 if (II.isLifetimeStartOrEnd()) {
1252 insertUse(II, Offset, AllocSize, true);
1253 return;
1254 }
1255
1257 }
1258
1259 Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &Size) {
1260 // We consider any PHI or select that results in a direct load or store of
1261 // the same offset to be a viable use for slicing purposes. These uses
1262 // are considered unsplittable and the size is the maximum loaded or stored
1263 // size.
1266 Visited.insert(Root);
1267 Uses.push_back(std::make_pair(cast<Instruction>(*U), Root));
1268 const DataLayout &DL = Root->getDataLayout();
1269 // If there are no loads or stores, the access is dead. We mark that as
1270 // a size zero access.
1271 Size = 0;
1272 do {
1273 Instruction *I, *UsedI;
1274 std::tie(UsedI, I) = Uses.pop_back_val();
1275
1276 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
1277 TypeSize LoadSize = DL.getTypeStoreSize(LI->getType());
1278 if (LoadSize.isScalable()) {
1279 PI.setAborted(LI);
1280 return nullptr;
1281 }
1282 Size = std::max(Size, LoadSize.getFixedValue());
1283 continue;
1284 }
1285 if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
1286 Value *Op = SI->getOperand(0);
1287 if (Op == UsedI)
1288 return SI;
1289 TypeSize StoreSize = DL.getTypeStoreSize(Op->getType());
1290 if (StoreSize.isScalable()) {
1291 PI.setAborted(SI);
1292 return nullptr;
1293 }
1294 Size = std::max(Size, StoreSize.getFixedValue());
1295 continue;
1296 }
1297
1298 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
1299 if (!GEP->hasAllZeroIndices())
1300 return GEP;
1301 } else if (!isa<BitCastInst>(I) && !isa<PHINode>(I) &&
1302 !isa<SelectInst>(I) && !isa<AddrSpaceCastInst>(I)) {
1303 return I;
1304 }
1305
1306 for (User *U : I->users())
1307 if (Visited.insert(cast<Instruction>(U)).second)
1308 Uses.push_back(std::make_pair(I, cast<Instruction>(U)));
1309 } while (!Uses.empty());
1310
1311 return nullptr;
1312 }
1313
1314 void visitPHINodeOrSelectInst(Instruction &I) {
1315 assert(isa<PHINode>(I) || isa<SelectInst>(I));
1316 if (I.use_empty())
1317 return markAsDead(I);
1318
1319 // If this is a PHI node before a catchswitch, we cannot insert any non-PHI
1320 // instructions in this BB, which may be required during rewriting. Bail out
1321 // on these cases.
1322 if (isa<PHINode>(I) &&
1323 I.getParent()->getFirstInsertionPt() == I.getParent()->end())
1324 return PI.setAborted(&I);
1325
1326 // TODO: We could use simplifyInstruction here to fold PHINodes and
1327 // SelectInsts. However, doing so requires to change the current
1328 // dead-operand-tracking mechanism. For instance, suppose neither loading
1329 // from %U nor %other traps. Then "load (select undef, %U, %other)" does not
1330 // trap either. However, if we simply replace %U with undef using the
1331 // current dead-operand-tracking mechanism, "load (select undef, undef,
1332 // %other)" may trap because the select may return the first operand
1333 // "undef".
1334 if (Value *Result = foldPHINodeOrSelectInst(I)) {
1335 if (Result == *U)
1336 // If the result of the constant fold will be the pointer, recurse
1337 // through the PHI/select as if we had RAUW'ed it.
1338 enqueueUsers(I);
1339 else
1340 // Otherwise the operand to the PHI/select is dead, and we can replace
1341 // it with poison.
1342 AS.DeadOperands.push_back(U);
1343
1344 return;
1345 }
1346
1347 if (!IsOffsetKnown)
1348 return PI.setAborted(&I);
1349
1350 // See if we already have computed info on this node.
1351 uint64_t &Size = PHIOrSelectSizes[&I];
1352 if (!Size) {
1353 // This is a new PHI/Select, check for an unsafe use of it.
1354 if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&I, Size))
1355 return PI.setAborted(UnsafeI);
1356 }
1357
1358 // For PHI and select operands outside the alloca, we can't nuke the entire
1359 // phi or select -- the other side might still be relevant, so we special
1360 // case them here and use a separate structure to track the operands
1361 // themselves which should be replaced with poison.
1362 // FIXME: This should instead be escaped in the event we're instrumenting
1363 // for address sanitization.
1364 if (Offset.uge(AllocSize)) {
1365 AS.DeadOperands.push_back(U);
1366 return;
1367 }
1368
1369 insertUse(I, Offset, Size);
1370 }
1371
1372 void visitPHINode(PHINode &PN) { visitPHINodeOrSelectInst(PN); }
1373
1374 void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
1375
1376 /// Disable SROA entirely if there are unhandled users of the alloca.
1377 void visitInstruction(Instruction &I) { PI.setAborted(&I); }
1378
1379 void visitCallBase(CallBase &CB) {
1380 // If the call operand is read-only and only does a read-only or address
1381 // capture, then we mark it as EscapedReadOnly.
1382 if (CB.isDataOperand(U) &&
1383 !capturesFullProvenance(CB.getCaptureInfo(U->getOperandNo())) &&
1384 CB.onlyReadsMemory(U->getOperandNo())) {
1386 return;
1387 }
1388
1390 }
1391};
1392
1393AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
1394 :
1395#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1396 AI(AI),
1397#endif
1398 PointerEscapingInstr(nullptr), PointerEscapingInstrReadOnly(nullptr) {
1399 SliceBuilder PB(DL, AI, *this);
1400 SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI);
1401 if (PtrI.isEscaped() || PtrI.isAborted()) {
1402 // FIXME: We should sink the escape vs. abort info into the caller nicely,
1403 // possibly by just storing the PtrInfo in the AllocaSlices.
1404 PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
1405 : PtrI.getAbortingInst();
1406 assert(PointerEscapingInstr && "Did not track a bad instruction");
1407 return;
1408 }
1409 PointerEscapingInstrReadOnly = PtrI.getEscapedReadOnlyInst();
1410
1411 llvm::erase_if(Slices, [](const Slice &S) { return S.isDead(); });
1412
1413 // Sort the uses. This arranges for the offsets to be in ascending order,
1414 // and the sizes to be in descending order.
1415 llvm::stable_sort(Slices);
1416}
1417
1418#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1419
1420void AllocaSlices::print(raw_ostream &OS, const_iterator I,
1421 StringRef Indent) const {
1422 printSlice(OS, I, Indent);
1423 OS << "\n";
1424 printUse(OS, I, Indent);
1425}
1426
1427void AllocaSlices::printSlice(raw_ostream &OS, const_iterator I,
1428 StringRef Indent) const {
1429 OS << Indent << "[" << I->beginOffset() << "," << I->endOffset() << ")"
1430 << " slice #" << (I - begin())
1431 << (I->isSplittable() ? " (splittable)" : "");
1432}
1433
1434void AllocaSlices::printUse(raw_ostream &OS, const_iterator I,
1435 StringRef Indent) const {
1436 OS << Indent << " used by: " << *I->getUse()->getUser() << "\n";
1437}
1438
1439void AllocaSlices::print(raw_ostream &OS) const {
1440 if (PointerEscapingInstr) {
1441 OS << "Can't analyze slices for alloca: " << AI << "\n"
1442 << " A pointer to this alloca escaped by:\n"
1443 << " " << *PointerEscapingInstr << "\n";
1444 return;
1445 }
1446
1447 if (PointerEscapingInstrReadOnly)
1448 OS << "Escapes into ReadOnly: " << *PointerEscapingInstrReadOnly << "\n";
1449
1450 OS << "Slices of alloca: " << AI << "\n";
1451 for (const_iterator I = begin(), E = end(); I != E; ++I)
1452 print(OS, I);
1453}
1454
1455LLVM_DUMP_METHOD void AllocaSlices::dump(const_iterator I) const {
1456 print(dbgs(), I);
1457}
1458LLVM_DUMP_METHOD void AllocaSlices::dump() const { print(dbgs()); }
1459
1460#endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1461
1462/// Walk the range of a partitioning looking for a common type to cover this
1463/// sequence of slices.
1464static std::pair<Type *, IntegerType *>
1465findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E,
1466 uint64_t EndOffset) {
1467 Type *Ty = nullptr;
1468 bool TyIsCommon = true;
1469 IntegerType *ITy = nullptr;
1470
1471 // Note that we need to look at *every* alloca slice's Use to ensure we
1472 // always get consistent results regardless of the order of slices.
1473 for (AllocaSlices::const_iterator I = B; I != E; ++I) {
1474 Use *U = I->getUse();
1475 if (isa<IntrinsicInst>(*U->getUser()))
1476 continue;
1477 if (I->beginOffset() != B->beginOffset() || I->endOffset() != EndOffset)
1478 continue;
1479
1480 Type *UserTy = nullptr;
1481 if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
1482 UserTy = LI->getType();
1483 } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
1484 UserTy = SI->getValueOperand()->getType();
1485 }
1486
1487 if (IntegerType *UserITy = dyn_cast_or_null<IntegerType>(UserTy)) {
1488 // If the type is larger than the partition, skip it. We only encounter
1489 // this for split integer operations where we want to use the type of the
1490 // entity causing the split. Also skip if the type is not a byte width
1491 // multiple.
1492 if (UserITy->getBitWidth() % 8 != 0 ||
1493 UserITy->getBitWidth() / 8 > (EndOffset - B->beginOffset()))
1494 continue;
1495
1496 // Track the largest bitwidth integer type used in this way in case there
1497 // is no common type.
1498 if (!ITy || ITy->getBitWidth() < UserITy->getBitWidth())
1499 ITy = UserITy;
1500 }
1501
1502 // To avoid depending on the order of slices, Ty and TyIsCommon must not
1503 // depend on types skipped above.
1504 if (!UserTy || (Ty && Ty != UserTy))
1505 TyIsCommon = false; // Give up on anything but an iN type.
1506 else
1507 Ty = UserTy;
1508 }
1509
1510 return {TyIsCommon ? Ty : nullptr, ITy};
1511}
1512
1513/// PHI instructions that use an alloca and are subsequently loaded can be
1514/// rewritten to load both input pointers in the pred blocks and then PHI the
1515/// results, allowing the load of the alloca to be promoted.
1516/// From this:
1517/// %P2 = phi [i32* %Alloca, i32* %Other]
1518/// %V = load i32* %P2
1519/// to:
1520/// %V1 = load i32* %Alloca -> will be mem2reg'd
1521/// ...
1522/// %V2 = load i32* %Other
1523/// ...
1524/// %V = phi [i32 %V1, i32 %V2]
1525///
1526/// We can do this to a select if its only uses are loads and if the operands
1527/// to the select can be loaded unconditionally.
1528///
1529/// FIXME: This should be hoisted into a generic utility, likely in
1530/// Transforms/Util/Local.h
1532 const DataLayout &DL = PN.getDataLayout();
1533
1534 // For now, we can only do this promotion if the load is in the same block
1535 // as the PHI, and if there are no stores between the phi and load.
1536 // TODO: Allow recursive phi users.
1537 // TODO: Allow stores.
1538 BasicBlock *BB = PN.getParent();
1539 Align MaxAlign;
1540 uint64_t APWidth = DL.getIndexTypeSizeInBits(PN.getType());
1541 Type *LoadType = nullptr;
1542 for (User *U : PN.users()) {
1543 LoadInst *LI = dyn_cast<LoadInst>(U);
1544 if (!LI || !LI->isSimple())
1545 return false;
1546
1547 // For now we only allow loads in the same block as the PHI. This is
1548 // a common case that happens when instcombine merges two loads through
1549 // a PHI.
1550 if (LI->getParent() != BB)
1551 return false;
1552
1553 if (LoadType) {
1554 if (LoadType != LI->getType())
1555 return false;
1556 } else {
1557 LoadType = LI->getType();
1558 }
1559
1560 // Ensure that there are no instructions between the PHI and the load that
1561 // could store.
1562 for (BasicBlock::iterator BBI(PN); &*BBI != LI; ++BBI)
1563 if (BBI->mayWriteToMemory())
1564 return false;
1565
1566 MaxAlign = std::max(MaxAlign, LI->getAlign());
1567 }
1568
1569 if (!LoadType)
1570 return false;
1571
1572 APInt LoadSize =
1573 APInt(APWidth, DL.getTypeStoreSize(LoadType).getFixedValue());
1574
1575 // We can only transform this if it is safe to push the loads into the
1576 // predecessor blocks. The only thing to watch out for is that we can't put
1577 // a possibly trapping load in the predecessor if it is a critical edge.
1578 for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
1580 Value *InVal = PN.getIncomingValue(Idx);
1581
1582 // If the value is produced by the terminator of the predecessor (an
1583 // invoke) or it has side-effects, there is no valid place to put a load
1584 // in the predecessor.
1585 if (TI == InVal || TI->mayHaveSideEffects())
1586 return false;
1587
1588 // If the predecessor has a single successor, then the edge isn't
1589 // critical.
1590 if (TI->getNumSuccessors() == 1)
1591 continue;
1592
1593 // If this pointer is always safe to load, or if we can prove that there
1594 // is already a load in the block, then we can move the load to the pred
1595 // block.
1596 if (isSafeToLoadUnconditionally(InVal, MaxAlign, LoadSize, DL, TI))
1597 continue;
1598
1599 return false;
1600 }
1601
1602 return true;
1603}
1604
1605static void speculatePHINodeLoads(IRBuilderTy &IRB, PHINode &PN) {
1606 LLVM_DEBUG(dbgs() << " original: " << PN << "\n");
1607
1608 LoadInst *SomeLoad = cast<LoadInst>(PN.user_back());
1609 Type *LoadTy = SomeLoad->getType();
1610 IRB.SetInsertPoint(&PN);
1611 PHINode *NewPN = IRB.CreatePHI(LoadTy, PN.getNumIncomingValues(),
1612 PN.getName() + ".sroa.speculated");
1613
1614 // Get the AA tags and alignment to use from one of the loads. It does not
1615 // matter which one we get and if any differ.
1616 AAMDNodes AATags = SomeLoad->getAAMetadata();
1617 Align Alignment = SomeLoad->getAlign();
1618
1619 // Rewrite all loads of the PN to use the new PHI.
1620 while (!PN.use_empty()) {
1621 LoadInst *LI = cast<LoadInst>(PN.user_back());
1622 LI->replaceAllUsesWith(NewPN);
1623 LI->eraseFromParent();
1624 }
1625
1626 // Inject loads into all of the pred blocks.
1627 DenseMap<BasicBlock *, Value *> InjectedLoads;
1628 for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
1629 BasicBlock *Pred = PN.getIncomingBlock(Idx);
1630 Value *InVal = PN.getIncomingValue(Idx);
1631
1632 // A PHI node is allowed to have multiple (duplicated) entries for the same
1633 // basic block, as long as the value is the same. So if we already injected
1634 // a load in the predecessor, then we should reuse the same load for all
1635 // duplicated entries.
1636 if (Value *V = InjectedLoads.lookup(Pred)) {
1637 NewPN->addIncoming(V, Pred);
1638 continue;
1639 }
1640
1641 Instruction *TI = Pred->getTerminator();
1642 IRB.SetInsertPoint(TI);
1643
1644 LoadInst *Load = IRB.CreateAlignedLoad(
1645 LoadTy, InVal, Alignment,
1646 (PN.getName() + ".sroa.speculate.load." + Pred->getName()));
1647 ++NumLoadsSpeculated;
1648 if (AATags)
1649 Load->setAAMetadata(AATags);
1650 NewPN->addIncoming(Load, Pred);
1651 InjectedLoads[Pred] = Load;
1652 }
1653
1654 LLVM_DEBUG(dbgs() << " speculated to: " << *NewPN << "\n");
1655 PN.eraseFromParent();
1656}
1657
1658SelectHandSpeculativity &
1659SelectHandSpeculativity::setAsSpeculatable(bool isTrueVal) {
1660 if (isTrueVal)
1661 Bitfield::set<SelectHandSpeculativity::TrueVal>(Storage, true);
1662 else
1663 Bitfield::set<SelectHandSpeculativity::FalseVal>(Storage, true);
1664 return *this;
1665}
1666
1667bool SelectHandSpeculativity::isSpeculatable(bool isTrueVal) const {
1668 return isTrueVal ? Bitfield::get<SelectHandSpeculativity::TrueVal>(Storage)
1669 : Bitfield::get<SelectHandSpeculativity::FalseVal>(Storage);
1670}
1671
1672bool SelectHandSpeculativity::areAllSpeculatable() const {
1673 return isSpeculatable(/*isTrueVal=*/true) &&
1674 isSpeculatable(/*isTrueVal=*/false);
1675}
1676
1677bool SelectHandSpeculativity::areAnySpeculatable() const {
1678 return isSpeculatable(/*isTrueVal=*/true) ||
1679 isSpeculatable(/*isTrueVal=*/false);
1680}
1681bool SelectHandSpeculativity::areNoneSpeculatable() const {
1682 return !areAnySpeculatable();
1683}
1684
1685static SelectHandSpeculativity
1687 assert(LI.isSimple() && "Only for simple loads");
1688 SelectHandSpeculativity Spec;
1689
1690 const DataLayout &DL = SI.getDataLayout();
1691 for (Value *Value : {SI.getTrueValue(), SI.getFalseValue()})
1693 &LI))
1694 Spec.setAsSpeculatable(/*isTrueVal=*/Value == SI.getTrueValue());
1695 else if (PreserveCFG)
1696 return Spec;
1697
1698 return Spec;
1699}
1700
1701std::optional<RewriteableMemOps>
1702SROA::isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG) {
1703 RewriteableMemOps Ops;
1704
1705 for (User *U : SI.users()) {
1706 if (auto *BC = dyn_cast<BitCastInst>(U); BC && BC->hasOneUse())
1707 U = *BC->user_begin();
1708
1709 if (auto *Store = dyn_cast<StoreInst>(U)) {
1710 // Note that atomic stores can be transformed; atomic semantics do not
1711 // have any meaning for a local alloca. Stores are not speculatable,
1712 // however, so if we can't turn it into a predicated store, we are done.
1713 if (Store->isVolatile() || PreserveCFG)
1714 return {}; // Give up on this `select`.
1715 Ops.emplace_back(Store);
1716 continue;
1717 }
1718
1719 auto *LI = dyn_cast<LoadInst>(U);
1720
1721 // Note that atomic loads can be transformed;
1722 // atomic semantics do not have any meaning for a local alloca.
1723 if (!LI || LI->isVolatile())
1724 return {}; // Give up on this `select`.
1725
1726 PossiblySpeculatableLoad Load(LI);
1727 if (!LI->isSimple()) {
1728 // If the `load` is not simple, we can't speculatively execute it,
1729 // but we could handle this via a CFG modification. But can we?
1730 if (PreserveCFG)
1731 return {}; // Give up on this `select`.
1732 Ops.emplace_back(Load);
1733 continue;
1734 }
1735
1736 SelectHandSpeculativity Spec =
1738 if (PreserveCFG && !Spec.areAllSpeculatable())
1739 return {}; // Give up on this `select`.
1740
1741 Load.setInt(Spec);
1742 Ops.emplace_back(Load);
1743 }
1744
1745 return Ops;
1746}
1747
1749 IRBuilderTy &IRB) {
1750 LLVM_DEBUG(dbgs() << " original load: " << SI << "\n");
1751
1752 Value *TV = SI.getTrueValue();
1753 Value *FV = SI.getFalseValue();
1754 // Replace the given load of the select with a select of two loads.
1755
1756 assert(LI.isSimple() && "We only speculate simple loads");
1757
1758 IRB.SetInsertPoint(&LI);
1759
1760 LoadInst *TL =
1761 IRB.CreateAlignedLoad(LI.getType(), TV, LI.getAlign(),
1762 LI.getName() + ".sroa.speculate.load.true");
1763 LoadInst *FL =
1764 IRB.CreateAlignedLoad(LI.getType(), FV, LI.getAlign(),
1765 LI.getName() + ".sroa.speculate.load.false");
1766 NumLoadsSpeculated += 2;
1767
1768 // Transfer alignment and AA info if present.
1769 TL->setAlignment(LI.getAlign());
1770 FL->setAlignment(LI.getAlign());
1771
1772 AAMDNodes Tags = LI.getAAMetadata();
1773 if (Tags) {
1774 TL->setAAMetadata(Tags);
1775 FL->setAAMetadata(Tags);
1776 }
1777
1778 Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,
1779 LI.getName() + ".sroa.speculated");
1780
1781 LLVM_DEBUG(dbgs() << " speculated to: " << *V << "\n");
1782 LI.replaceAllUsesWith(V);
1783}
1784
1785template <typename T>
1787 SelectHandSpeculativity Spec,
1788 DomTreeUpdater &DTU) {
1789 assert((isa<LoadInst>(I) || isa<StoreInst>(I)) && "Only for load and store!");
1790 LLVM_DEBUG(dbgs() << " original mem op: " << I << "\n");
1791 BasicBlock *Head = I.getParent();
1792 Instruction *ThenTerm = nullptr;
1793 Instruction *ElseTerm = nullptr;
1794 if (Spec.areNoneSpeculatable())
1795 SplitBlockAndInsertIfThenElse(SI.getCondition(), &I, &ThenTerm, &ElseTerm,
1796 SI.getMetadata(LLVMContext::MD_prof), &DTU);
1797 else {
1798 SplitBlockAndInsertIfThen(SI.getCondition(), &I, /*Unreachable=*/false,
1799 SI.getMetadata(LLVMContext::MD_prof), &DTU,
1800 /*LI=*/nullptr, /*ThenBlock=*/nullptr);
1801 if (Spec.isSpeculatable(/*isTrueVal=*/true))
1802 cast<BranchInst>(Head->getTerminator())->swapSuccessors();
1803 }
1804 auto *HeadBI = cast<BranchInst>(Head->getTerminator());
1805 Spec = {}; // Do not use `Spec` beyond this point.
1806 BasicBlock *Tail = I.getParent();
1807 Tail->setName(Head->getName() + ".cont");
1808 PHINode *PN;
1809 if (isa<LoadInst>(I))
1810 PN = PHINode::Create(I.getType(), 2, "", I.getIterator());
1811 for (BasicBlock *SuccBB : successors(Head)) {
1812 bool IsThen = SuccBB == HeadBI->getSuccessor(0);
1813 int SuccIdx = IsThen ? 0 : 1;
1814 auto *NewMemOpBB = SuccBB == Tail ? Head : SuccBB;
1815 auto &CondMemOp = cast<T>(*I.clone());
1816 if (NewMemOpBB != Head) {
1817 NewMemOpBB->setName(Head->getName() + (IsThen ? ".then" : ".else"));
1818 if (isa<LoadInst>(I))
1819 ++NumLoadsPredicated;
1820 else
1821 ++NumStoresPredicated;
1822 } else {
1823 CondMemOp.dropUBImplyingAttrsAndMetadata();
1824 ++NumLoadsSpeculated;
1825 }
1826 CondMemOp.insertBefore(NewMemOpBB->getTerminator()->getIterator());
1827 Value *Ptr = SI.getOperand(1 + SuccIdx);
1828 CondMemOp.setOperand(I.getPointerOperandIndex(), Ptr);
1829 if (isa<LoadInst>(I)) {
1830 CondMemOp.setName(I.getName() + (IsThen ? ".then" : ".else") + ".val");
1831 PN->addIncoming(&CondMemOp, NewMemOpBB);
1832 } else
1833 LLVM_DEBUG(dbgs() << " to: " << CondMemOp << "\n");
1834 }
1835 if (isa<LoadInst>(I)) {
1836 PN->takeName(&I);
1837 LLVM_DEBUG(dbgs() << " to: " << *PN << "\n");
1838 I.replaceAllUsesWith(PN);
1839 }
1840}
1841
1843 SelectHandSpeculativity Spec,
1844 DomTreeUpdater &DTU) {
1845 if (auto *LI = dyn_cast<LoadInst>(&I))
1846 rewriteMemOpOfSelect(SelInst, *LI, Spec, DTU);
1847 else if (auto *SI = dyn_cast<StoreInst>(&I))
1848 rewriteMemOpOfSelect(SelInst, *SI, Spec, DTU);
1849 else
1850 llvm_unreachable_internal("Only for load and store.");
1851}
1852
1854 const RewriteableMemOps &Ops,
1855 IRBuilderTy &IRB, DomTreeUpdater *DTU) {
1856 bool CFGChanged = false;
1857 LLVM_DEBUG(dbgs() << " original select: " << SI << "\n");
1858
1859 for (const RewriteableMemOp &Op : Ops) {
1860 SelectHandSpeculativity Spec;
1861 Instruction *I;
1862 if (auto *const *US = std::get_if<UnspeculatableStore>(&Op)) {
1863 I = *US;
1864 } else {
1865 auto PSL = std::get<PossiblySpeculatableLoad>(Op);
1866 I = PSL.getPointer();
1867 Spec = PSL.getInt();
1868 }
1869 if (Spec.areAllSpeculatable()) {
1870 speculateSelectInstLoads(SI, cast<LoadInst>(*I), IRB);
1871 } else {
1872 assert(DTU && "Should not get here when not allowed to modify the CFG!");
1873 rewriteMemOpOfSelect(SI, *I, Spec, *DTU);
1874 CFGChanged = true;
1875 }
1876 I->eraseFromParent();
1877 }
1878
1879 for (User *U : make_early_inc_range(SI.users()))
1880 cast<BitCastInst>(U)->eraseFromParent();
1881 SI.eraseFromParent();
1882 return CFGChanged;
1883}
1884
1885/// Compute an adjusted pointer from Ptr by Offset bytes where the
1886/// resulting pointer has PointerTy.
1887static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
1889 const Twine &NamePrefix) {
1890 if (Offset != 0)
1891 Ptr = IRB.CreateInBoundsPtrAdd(Ptr, IRB.getInt(Offset),
1892 NamePrefix + "sroa_idx");
1893 return IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr, PointerTy,
1894 NamePrefix + "sroa_cast");
1895}
1896
1897/// Compute the adjusted alignment for a load or store from an offset.
1900}
1901
1902/// Test whether we can convert a value from the old to the new type.
1903///
1904/// This predicate should be used to guard calls to convertValue in order to
1905/// ensure that we only try to convert viable values. The strategy is that we
1906/// will peel off single element struct and array wrappings to get to an
1907/// underlying value, and convert that value.
1908static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy,
1909 unsigned VScale = 0) {
1910 if (OldTy == NewTy)
1911 return true;
1912
1913 // For integer types, we can't handle any bit-width differences. This would
1914 // break both vector conversions with extension and introduce endianness
1915 // issues when in conjunction with loads and stores.
1916 if (isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) {
1917 assert(cast<IntegerType>(OldTy)->getBitWidth() !=
1918 cast<IntegerType>(NewTy)->getBitWidth() &&
1919 "We can't have the same bitwidth for different int types");
1920 return false;
1921 }
1922
1923 TypeSize NewSize = DL.getTypeSizeInBits(NewTy);
1924 TypeSize OldSize = DL.getTypeSizeInBits(OldTy);
1925
1926 if ((isa<ScalableVectorType>(NewTy) && isa<FixedVectorType>(OldTy)) ||
1927 (isa<ScalableVectorType>(OldTy) && isa<FixedVectorType>(NewTy))) {
1928 // Conversion is only possible when the size of scalable vectors is known.
1929 if (!VScale)
1930 return false;
1931
1932 // For ptr-to-int and int-to-ptr casts, the pointer side is resolved within
1933 // a single domain (either fixed or scalable). Any additional conversion
1934 // between fixed and scalable types is handled through integer types.
1935 auto OldVTy = OldTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(OldTy) : OldTy;
1936 auto NewVTy = NewTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(NewTy) : NewTy;
1937
1938 if (isa<ScalableVectorType>(NewTy)) {
1939 if (!VectorType::getWithSizeAndScalar(cast<VectorType>(NewVTy), OldVTy))
1940 return false;
1941
1942 NewSize = TypeSize::getFixed(NewSize.getKnownMinValue() * VScale);
1943 } else {
1944 if (!VectorType::getWithSizeAndScalar(cast<VectorType>(OldVTy), NewVTy))
1945 return false;
1946
1947 OldSize = TypeSize::getFixed(OldSize.getKnownMinValue() * VScale);
1948 }
1949 }
1950
1951 if (NewSize != OldSize)
1952 return false;
1953 if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
1954 return false;
1955
1956 // We can convert pointers to integers and vice-versa. Same for vectors
1957 // of pointers and integers.
1958 OldTy = OldTy->getScalarType();
1959 NewTy = NewTy->getScalarType();
1960 if (NewTy->isPointerTy() || OldTy->isPointerTy()) {
1961 if (NewTy->isPointerTy() && OldTy->isPointerTy()) {
1962 unsigned OldAS = OldTy->getPointerAddressSpace();
1963 unsigned NewAS = NewTy->getPointerAddressSpace();
1964 // Convert pointers if they are pointers from the same address space or
1965 // different integral (not non-integral) address spaces with the same
1966 // pointer size.
1967 return OldAS == NewAS ||
1968 (!DL.isNonIntegralAddressSpace(OldAS) &&
1969 !DL.isNonIntegralAddressSpace(NewAS) &&
1970 DL.getPointerSize(OldAS) == DL.getPointerSize(NewAS));
1971 }
1972
1973 // We can convert integers to integral pointers, but not to non-integral
1974 // pointers.
1975 if (OldTy->isIntegerTy())
1976 return !DL.isNonIntegralPointerType(NewTy);
1977
1978 // We can convert integral pointers to integers, but non-integral pointers
1979 // need to remain pointers.
1980 if (!DL.isNonIntegralPointerType(OldTy))
1981 return NewTy->isIntegerTy();
1982
1983 return false;
1984 }
1985
1986 if (OldTy->isTargetExtTy() || NewTy->isTargetExtTy())
1987 return false;
1988
1989 return true;
1990}
1991
1992/// Generic routine to convert an SSA value to a value of a different
1993/// type.
1994///
1995/// This will try various different casting techniques, such as bitcasts,
1996/// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test
1997/// two types for viability with this routine.
1998static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
1999 Type *NewTy) {
2000 Type *OldTy = V->getType();
2001
2002#ifndef NDEBUG
2003 BasicBlock *BB = IRB.GetInsertBlock();
2004 assert(BB && BB->getParent() && "VScale unknown!");
2005 unsigned VScale = BB->getParent()->getVScaleValue();
2006 assert(canConvertValue(DL, OldTy, NewTy, VScale) &&
2007 "Value not convertable to type");
2008#endif
2009
2010 if (OldTy == NewTy)
2011 return V;
2012
2013 assert(!(isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) &&
2014 "Integer types must be the exact same to convert.");
2015
2016 // A variant of bitcast that supports a mixture of fixed and scalable types
2017 // that are know to have the same size.
2018 auto CreateBitCastLike = [&IRB](Value *In, Type *Ty) -> Value * {
2019 Type *InTy = In->getType();
2020 if (InTy == Ty)
2021 return In;
2022
2023 if (isa<FixedVectorType>(InTy) && isa<ScalableVectorType>(Ty)) {
2024 // For vscale_range(2) expand <4 x i32> to <vscale x 4 x i16> -->
2025 // <4 x i32> to <vscale x 2 x i32> to <vscale x 4 x i16>
2026 auto *VTy = VectorType::getWithSizeAndScalar(cast<VectorType>(Ty), InTy);
2027 return IRB.CreateBitCast(IRB.CreateInsertVector(VTy,
2028 PoisonValue::get(VTy), In,
2029 IRB.getInt64(0)),
2030 Ty);
2031 }
2032
2033 if (isa<ScalableVectorType>(InTy) && isa<FixedVectorType>(Ty)) {
2034 // For vscale_range(2) expand <vscale x 4 x i16> to <4 x i32> -->
2035 // <vscale x 4 x i16> to <vscale x 2 x i32> to <4 x i32>
2036 auto *VTy = VectorType::getWithSizeAndScalar(cast<VectorType>(InTy), Ty);
2037 return IRB.CreateExtractVector(Ty, IRB.CreateBitCast(In, VTy),
2038 IRB.getInt64(0));
2039 }
2040
2041 return IRB.CreateBitCast(In, Ty);
2042 };
2043
2044 // See if we need inttoptr for this type pair. May require additional bitcast.
2045 if (OldTy->isIntOrIntVectorTy() && NewTy->isPtrOrPtrVectorTy()) {
2046 // Expand <2 x i32> to i8* --> <2 x i32> to i64 to i8*
2047 // Expand i128 to <2 x i8*> --> i128 to <2 x i64> to <2 x i8*>
2048 // Expand <4 x i32> to <2 x i8*> --> <4 x i32> to <2 x i64> to <2 x i8*>
2049 // Directly handle i64 to i8*
2050 return IRB.CreateIntToPtr(CreateBitCastLike(V, DL.getIntPtrType(NewTy)),
2051 NewTy);
2052 }
2053
2054 // See if we need ptrtoint for this type pair. May require additional bitcast.
2055 if (OldTy->isPtrOrPtrVectorTy() && NewTy->isIntOrIntVectorTy()) {
2056 // Expand <2 x i8*> to i128 --> <2 x i8*> to <2 x i64> to i128
2057 // Expand i8* to <2 x i32> --> i8* to i64 to <2 x i32>
2058 // Expand <2 x i8*> to <4 x i32> --> <2 x i8*> to <2 x i64> to <4 x i32>
2059 // Expand i8* to i64 --> i8* to i64 to i64
2060 return CreateBitCastLike(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
2061 NewTy);
2062 }
2063
2064 if (OldTy->isPtrOrPtrVectorTy() && NewTy->isPtrOrPtrVectorTy()) {
2065 unsigned OldAS = OldTy->getPointerAddressSpace();
2066 unsigned NewAS = NewTy->getPointerAddressSpace();
2067 // To convert pointers with different address spaces (they are already
2068 // checked convertible, i.e. they have the same pointer size), so far we
2069 // cannot use `bitcast` (which has restrict on the same address space) or
2070 // `addrspacecast` (which is not always no-op casting). Instead, use a pair
2071 // of no-op `ptrtoint`/`inttoptr` casts through an integer with the same bit
2072 // size.
2073 if (OldAS != NewAS) {
2074 assert(DL.getPointerSize(OldAS) == DL.getPointerSize(NewAS));
2075 return IRB.CreateIntToPtr(
2076 CreateBitCastLike(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
2077 DL.getIntPtrType(NewTy)),
2078 NewTy);
2079 }
2080 }
2081
2082 return CreateBitCastLike(V, NewTy);
2083}
2084
2085/// Test whether the given slice use can be promoted to a vector.
2086///
2087/// This function is called to test each entry in a partition which is slated
2088/// for a single slice.
2089static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
2090 VectorType *Ty,
2091 uint64_t ElementSize,
2092 const DataLayout &DL,
2093 unsigned VScale) {
2094 // First validate the slice offsets.
2095 uint64_t BeginOffset =
2096 std::max(S.beginOffset(), P.beginOffset()) - P.beginOffset();
2097 uint64_t BeginIndex = BeginOffset / ElementSize;
2098 if (BeginIndex * ElementSize != BeginOffset ||
2099 BeginIndex >= cast<FixedVectorType>(Ty)->getNumElements())
2100 return false;
2101 uint64_t EndOffset = std::min(S.endOffset(), P.endOffset()) - P.beginOffset();
2102 uint64_t EndIndex = EndOffset / ElementSize;
2103 if (EndIndex * ElementSize != EndOffset ||
2104 EndIndex > cast<FixedVectorType>(Ty)->getNumElements())
2105 return false;
2106
2107 assert(EndIndex > BeginIndex && "Empty vector!");
2108 uint64_t NumElements = EndIndex - BeginIndex;
2109 Type *SliceTy = (NumElements == 1)
2110 ? Ty->getElementType()
2111 : FixedVectorType::get(Ty->getElementType(), NumElements);
2112
2113 Type *SplitIntTy =
2114 Type::getIntNTy(Ty->getContext(), NumElements * ElementSize * 8);
2115
2116 Use *U = S.getUse();
2117
2118 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
2119 if (MI->isVolatile())
2120 return false;
2121 if (!S.isSplittable())
2122 return false; // Skip any unsplittable intrinsics.
2123 } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
2124 if (!II->isLifetimeStartOrEnd() && !II->isDroppable())
2125 return false;
2126 } else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
2127 if (LI->isVolatile())
2128 return false;
2129 Type *LTy = LI->getType();
2130 // Disable vector promotion when there are loads or stores of an FCA.
2131 if (LTy->isStructTy())
2132 return false;
2133 if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
2134 assert(LTy->isIntegerTy());
2135 LTy = SplitIntTy;
2136 }
2137 if (!canConvertValue(DL, SliceTy, LTy, VScale))
2138 return false;
2139 } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
2140 if (SI->isVolatile())
2141 return false;
2142 Type *STy = SI->getValueOperand()->getType();
2143 // Disable vector promotion when there are loads or stores of an FCA.
2144 if (STy->isStructTy())
2145 return false;
2146 if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
2147 assert(STy->isIntegerTy());
2148 STy = SplitIntTy;
2149 }
2150 if (!canConvertValue(DL, STy, SliceTy, VScale))
2151 return false;
2152 } else {
2153 return false;
2154 }
2155
2156 return true;
2157}
2158
2159/// Test whether a vector type is viable for promotion.
2160///
2161/// This implements the necessary checking for \c checkVectorTypesForPromotion
2162/// (and thus isVectorPromotionViable) over all slices of the alloca for the
2163/// given VectorType.
2164static bool checkVectorTypeForPromotion(Partition &P, VectorType *VTy,
2165 const DataLayout &DL, unsigned VScale) {
2166 uint64_t ElementSize =
2167 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2168
2169 // While the definition of LLVM vectors is bitpacked, we don't support sizes
2170 // that aren't byte sized.
2171 if (ElementSize % 8)
2172 return false;
2173 assert((DL.getTypeSizeInBits(VTy).getFixedValue() % 8) == 0 &&
2174 "vector size not a multiple of element size?");
2175 ElementSize /= 8;
2176
2177 for (const Slice &S : P)
2178 if (!isVectorPromotionViableForSlice(P, S, VTy, ElementSize, DL, VScale))
2179 return false;
2180
2181 for (const Slice *S : P.splitSliceTails())
2182 if (!isVectorPromotionViableForSlice(P, *S, VTy, ElementSize, DL, VScale))
2183 return false;
2184
2185 return true;
2186}
2187
2188/// Test whether any vector type in \p CandidateTys is viable for promotion.
2189///
2190/// This implements the necessary checking for \c isVectorPromotionViable over
2191/// all slices of the alloca for the given VectorType.
2192static VectorType *
2194 SmallVectorImpl<VectorType *> &CandidateTys,
2195 bool HaveCommonEltTy, Type *CommonEltTy,
2196 bool HaveVecPtrTy, bool HaveCommonVecPtrTy,
2197 VectorType *CommonVecPtrTy, unsigned VScale) {
2198 // If we didn't find a vector type, nothing to do here.
2199 if (CandidateTys.empty())
2200 return nullptr;
2201
2202 // Pointer-ness is sticky, if we had a vector-of-pointers candidate type,
2203 // then we should choose it, not some other alternative.
2204 // But, we can't perform a no-op pointer address space change via bitcast,
2205 // so if we didn't have a common pointer element type, bail.
2206 if (HaveVecPtrTy && !HaveCommonVecPtrTy)
2207 return nullptr;
2208
2209 // Try to pick the "best" element type out of the choices.
2210 if (!HaveCommonEltTy && HaveVecPtrTy) {
2211 // If there was a pointer element type, there's really only one choice.
2212 CandidateTys.clear();
2213 CandidateTys.push_back(CommonVecPtrTy);
2214 } else if (!HaveCommonEltTy && !HaveVecPtrTy) {
2215 // Integer-ify vector types.
2216 for (VectorType *&VTy : CandidateTys) {
2217 if (!VTy->getElementType()->isIntegerTy())
2218 VTy = cast<VectorType>(VTy->getWithNewType(IntegerType::getIntNTy(
2219 VTy->getContext(), VTy->getScalarSizeInBits())));
2220 }
2221
2222 // Rank the remaining candidate vector types. This is easy because we know
2223 // they're all integer vectors. We sort by ascending number of elements.
2224 auto RankVectorTypesComp = [&DL](VectorType *RHSTy, VectorType *LHSTy) {
2225 (void)DL;
2226 assert(DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2227 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2228 "Cannot have vector types of different sizes!");
2229 assert(RHSTy->getElementType()->isIntegerTy() &&
2230 "All non-integer types eliminated!");
2231 assert(LHSTy->getElementType()->isIntegerTy() &&
2232 "All non-integer types eliminated!");
2233 return cast<FixedVectorType>(RHSTy)->getNumElements() <
2234 cast<FixedVectorType>(LHSTy)->getNumElements();
2235 };
2236 auto RankVectorTypesEq = [&DL](VectorType *RHSTy, VectorType *LHSTy) {
2237 (void)DL;
2238 assert(DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2239 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2240 "Cannot have vector types of different sizes!");
2241 assert(RHSTy->getElementType()->isIntegerTy() &&
2242 "All non-integer types eliminated!");
2243 assert(LHSTy->getElementType()->isIntegerTy() &&
2244 "All non-integer types eliminated!");
2245 return cast<FixedVectorType>(RHSTy)->getNumElements() ==
2246 cast<FixedVectorType>(LHSTy)->getNumElements();
2247 };
2248 llvm::sort(CandidateTys, RankVectorTypesComp);
2249 CandidateTys.erase(llvm::unique(CandidateTys, RankVectorTypesEq),
2250 CandidateTys.end());
2251 } else {
2252// The only way to have the same element type in every vector type is to
2253// have the same vector type. Check that and remove all but one.
2254#ifndef NDEBUG
2255 for (VectorType *VTy : CandidateTys) {
2256 assert(VTy->getElementType() == CommonEltTy &&
2257 "Unaccounted for element type!");
2258 assert(VTy == CandidateTys[0] &&
2259 "Different vector types with the same element type!");
2260 }
2261#endif
2262 CandidateTys.resize(1);
2263 }
2264
2265 // FIXME: hack. Do we have a named constant for this?
2266 // SDAG SDNode can't have more than 65535 operands.
2267 llvm::erase_if(CandidateTys, [](VectorType *VTy) {
2268 return cast<FixedVectorType>(VTy)->getNumElements() >
2269 std::numeric_limits<unsigned short>::max();
2270 });
2271
2272 for (VectorType *VTy : CandidateTys)
2273 if (checkVectorTypeForPromotion(P, VTy, DL, VScale))
2274 return VTy;
2275
2276 return nullptr;
2277}
2278
2280 SetVector<Type *> &OtherTys, ArrayRef<VectorType *> CandidateTysCopy,
2281 function_ref<void(Type *)> CheckCandidateType, Partition &P,
2282 const DataLayout &DL, SmallVectorImpl<VectorType *> &CandidateTys,
2283 bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy,
2284 bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy, unsigned VScale) {
2285 [[maybe_unused]] VectorType *OriginalElt =
2286 CandidateTysCopy.size() ? CandidateTysCopy[0] : nullptr;
2287 // Consider additional vector types where the element type size is a
2288 // multiple of load/store element size.
2289 for (Type *Ty : OtherTys) {
2290 if (!VectorType::isValidElementType(Ty))
2291 continue;
2292 unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
2293 // Make a copy of CandidateTys and iterate through it, because we
2294 // might append to CandidateTys in the loop.
2295 for (VectorType *const VTy : CandidateTysCopy) {
2296 // The elements in the copy should remain invariant throughout the loop
2297 assert(CandidateTysCopy[0] == OriginalElt && "Different Element");
2298 unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();
2299 unsigned ElementSize =
2300 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2301 if (TypeSize != VectorSize && TypeSize != ElementSize &&
2302 VectorSize % TypeSize == 0) {
2303 VectorType *NewVTy = VectorType::get(Ty, VectorSize / TypeSize, false);
2304 CheckCandidateType(NewVTy);
2305 }
2306 }
2307 }
2308
2310 P, DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2311 HaveCommonVecPtrTy, CommonVecPtrTy, VScale);
2312}
2313
2314/// Test whether the given alloca partitioning and range of slices can be
2315/// promoted to a vector.
2316///
2317/// This is a quick test to check whether we can rewrite a particular alloca
2318/// partition (and its newly formed alloca) into a vector alloca with only
2319/// whole-vector loads and stores such that it could be promoted to a vector
2320/// SSA value. We only can ensure this for a limited set of operations, and we
2321/// don't want to do the rewrites unless we are confident that the result will
2322/// be promotable, so we have an early test here.
2324 unsigned VScale) {
2325 // Collect the candidate types for vector-based promotion. Also track whether
2326 // we have different element types.
2327 SmallVector<VectorType *, 4> CandidateTys;
2328 SetVector<Type *> LoadStoreTys;
2329 SetVector<Type *> DeferredTys;
2330 Type *CommonEltTy = nullptr;
2331 VectorType *CommonVecPtrTy = nullptr;
2332 bool HaveVecPtrTy = false;
2333 bool HaveCommonEltTy = true;
2334 bool HaveCommonVecPtrTy = true;
2335 auto CheckCandidateType = [&](Type *Ty) {
2336 if (auto *VTy = dyn_cast<FixedVectorType>(Ty)) {
2337 // Return if bitcast to vectors is different for total size in bits.
2338 if (!CandidateTys.empty()) {
2339 VectorType *V = CandidateTys[0];
2340 if (DL.getTypeSizeInBits(VTy).getFixedValue() !=
2341 DL.getTypeSizeInBits(V).getFixedValue()) {
2342 CandidateTys.clear();
2343 return;
2344 }
2345 }
2346 CandidateTys.push_back(VTy);
2347 Type *EltTy = VTy->getElementType();
2348
2349 if (!CommonEltTy)
2350 CommonEltTy = EltTy;
2351 else if (CommonEltTy != EltTy)
2352 HaveCommonEltTy = false;
2353
2354 if (EltTy->isPointerTy()) {
2355 HaveVecPtrTy = true;
2356 if (!CommonVecPtrTy)
2357 CommonVecPtrTy = VTy;
2358 else if (CommonVecPtrTy != VTy)
2359 HaveCommonVecPtrTy = false;
2360 }
2361 }
2362 };
2363
2364 // Put load and store types into a set for de-duplication.
2365 for (const Slice &S : P) {
2366 Type *Ty;
2367 if (auto *LI = dyn_cast<LoadInst>(S.getUse()->getUser()))
2368 Ty = LI->getType();
2369 else if (auto *SI = dyn_cast<StoreInst>(S.getUse()->getUser()))
2370 Ty = SI->getValueOperand()->getType();
2371 else
2372 continue;
2373
2374 auto CandTy = Ty->getScalarType();
2375 if (CandTy->isPointerTy() && (S.beginOffset() != P.beginOffset() ||
2376 S.endOffset() != P.endOffset())) {
2377 DeferredTys.insert(Ty);
2378 continue;
2379 }
2380
2381 LoadStoreTys.insert(Ty);
2382 // Consider any loads or stores that are the exact size of the slice.
2383 if (S.beginOffset() == P.beginOffset() && S.endOffset() == P.endOffset())
2384 CheckCandidateType(Ty);
2385 }
2386
2387 SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
2389 LoadStoreTys, CandidateTysCopy, CheckCandidateType, P, DL,
2390 CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2391 HaveCommonVecPtrTy, CommonVecPtrTy, VScale))
2392 return VTy;
2393
2394 CandidateTys.clear();
2396 DeferredTys, CandidateTysCopy, CheckCandidateType, P, DL, CandidateTys,
2397 HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,
2398 CommonVecPtrTy, VScale);
2399}
2400
2401/// Test whether a slice of an alloca is valid for integer widening.
2402///
2403/// This implements the necessary checking for the \c isIntegerWideningViable
2404/// test below on a single slice of the alloca.
2405static bool isIntegerWideningViableForSlice(const Slice &S,
2406 uint64_t AllocBeginOffset,
2407 Type *AllocaTy,
2408 const DataLayout &DL,
2409 bool &WholeAllocaOp) {
2410 uint64_t Size = DL.getTypeStoreSize(AllocaTy).getFixedValue();
2411
2412 uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;
2413 uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
2414
2415 Use *U = S.getUse();
2416
2417 // Lifetime intrinsics operate over the whole alloca whose sizes are usually
2418 // larger than other load/store slices (RelEnd > Size). But lifetime are
2419 // always promotable and should not impact other slices' promotability of the
2420 // partition.
2421 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
2422 if (II->isLifetimeStartOrEnd() || II->isDroppable())
2423 return true;
2424 }
2425
2426 // We can't reasonably handle cases where the load or store extends past
2427 // the end of the alloca's type and into its padding.
2428 if (RelEnd > Size)
2429 return false;
2430
2431 if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
2432 if (LI->isVolatile())
2433 return false;
2434 // We can't handle loads that extend past the allocated memory.
2435 TypeSize LoadSize = DL.getTypeStoreSize(LI->getType());
2436 if (!LoadSize.isFixed() || LoadSize.getFixedValue() > Size)
2437 return false;
2438 // So far, AllocaSliceRewriter does not support widening split slice tails
2439 // in rewriteIntegerLoad.
2440 if (S.beginOffset() < AllocBeginOffset)
2441 return false;
2442 // Note that we don't count vector loads or stores as whole-alloca
2443 // operations which enable integer widening because we would prefer to use
2444 // vector widening instead.
2445 if (!isa<VectorType>(LI->getType()) && RelBegin == 0 && RelEnd == Size)
2446 WholeAllocaOp = true;
2447 if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) {
2448 if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2449 return false;
2450 } else if (RelBegin != 0 || RelEnd != Size ||
2451 !canConvertValue(DL, AllocaTy, LI->getType())) {
2452 // Non-integer loads need to be convertible from the alloca type so that
2453 // they are promotable.
2454 return false;
2455 }
2456 } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
2457 Type *ValueTy = SI->getValueOperand()->getType();
2458 if (SI->isVolatile())
2459 return false;
2460 // We can't handle stores that extend past the allocated memory.
2461 TypeSize StoreSize = DL.getTypeStoreSize(ValueTy);
2462 if (!StoreSize.isFixed() || StoreSize.getFixedValue() > Size)
2463 return false;
2464 // So far, AllocaSliceRewriter does not support widening split slice tails
2465 // in rewriteIntegerStore.
2466 if (S.beginOffset() < AllocBeginOffset)
2467 return false;
2468 // Note that we don't count vector loads or stores as whole-alloca
2469 // operations which enable integer widening because we would prefer to use
2470 // vector widening instead.
2471 if (!isa<VectorType>(ValueTy) && RelBegin == 0 && RelEnd == Size)
2472 WholeAllocaOp = true;
2473 if (IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) {
2474 if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2475 return false;
2476 } else if (RelBegin != 0 || RelEnd != Size ||
2477 !canConvertValue(DL, ValueTy, AllocaTy)) {
2478 // Non-integer stores need to be convertible to the alloca type so that
2479 // they are promotable.
2480 return false;
2481 }
2482 } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
2483 if (MI->isVolatile() || !isa<Constant>(MI->getLength()))
2484 return false;
2485 if (!S.isSplittable())
2486 return false; // Skip any unsplittable intrinsics.
2487 } else {
2488 return false;
2489 }
2490
2491 return true;
2492}
2493
2494/// Test whether the given alloca partition's integer operations can be
2495/// widened to promotable ones.
2496///
2497/// This is a quick test to check whether we can rewrite the integer loads and
2498/// stores to a particular alloca into wider loads and stores and be able to
2499/// promote the resulting alloca.
2500static bool isIntegerWideningViable(Partition &P, Type *AllocaTy,
2501 const DataLayout &DL) {
2502 uint64_t SizeInBits = DL.getTypeSizeInBits(AllocaTy).getFixedValue();
2503 // Don't create integer types larger than the maximum bitwidth.
2504 if (SizeInBits > IntegerType::MAX_INT_BITS)
2505 return false;
2506
2507 // Don't try to handle allocas with bit-padding.
2508 if (SizeInBits != DL.getTypeStoreSizeInBits(AllocaTy).getFixedValue())
2509 return false;
2510
2511 // We need to ensure that an integer type with the appropriate bitwidth can
2512 // be converted to the alloca type, whatever that is. We don't want to force
2513 // the alloca itself to have an integer type if there is a more suitable one.
2514 Type *IntTy = Type::getIntNTy(AllocaTy->getContext(), SizeInBits);
2515 if (!canConvertValue(DL, AllocaTy, IntTy) ||
2516 !canConvertValue(DL, IntTy, AllocaTy))
2517 return false;
2518
2519 // While examining uses, we ensure that the alloca has a covering load or
2520 // store. We don't want to widen the integer operations only to fail to
2521 // promote due to some other unsplittable entry (which we may make splittable
2522 // later). However, if there are only splittable uses, go ahead and assume
2523 // that we cover the alloca.
2524 // FIXME: We shouldn't consider split slices that happen to start in the
2525 // partition here...
2526 bool WholeAllocaOp = P.empty() && DL.isLegalInteger(SizeInBits);
2527
2528 for (const Slice &S : P)
2529 if (!isIntegerWideningViableForSlice(S, P.beginOffset(), AllocaTy, DL,
2530 WholeAllocaOp))
2531 return false;
2532
2533 for (const Slice *S : P.splitSliceTails())
2534 if (!isIntegerWideningViableForSlice(*S, P.beginOffset(), AllocaTy, DL,
2535 WholeAllocaOp))
2536 return false;
2537
2538 return WholeAllocaOp;
2539}
2540
2541static Value *extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
2543 const Twine &Name) {
2544 LLVM_DEBUG(dbgs() << " start: " << *V << "\n");
2545 IntegerType *IntTy = cast<IntegerType>(V->getType());
2546 assert(DL.getTypeStoreSize(Ty).getFixedValue() + Offset <=
2547 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2548 "Element extends past full value");
2549 uint64_t ShAmt = 8 * Offset;
2550 if (DL.isBigEndian())
2551 ShAmt = 8 * (DL.getTypeStoreSize(IntTy).getFixedValue() -
2552 DL.getTypeStoreSize(Ty).getFixedValue() - Offset);
2553 if (ShAmt) {
2554 V = IRB.CreateLShr(V, ShAmt, Name + ".shift");
2555 LLVM_DEBUG(dbgs() << " shifted: " << *V << "\n");
2556 }
2557 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2558 "Cannot extract to a larger integer!");
2559 if (Ty != IntTy) {
2560 V = IRB.CreateTrunc(V, Ty, Name + ".trunc");
2561 LLVM_DEBUG(dbgs() << " trunced: " << *V << "\n");
2562 }
2563 return V;
2564}
2565
2566static Value *insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old,
2567 Value *V, uint64_t Offset, const Twine &Name) {
2568 IntegerType *IntTy = cast<IntegerType>(Old->getType());
2569 IntegerType *Ty = cast<IntegerType>(V->getType());
2570 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2571 "Cannot insert a larger integer!");
2572 LLVM_DEBUG(dbgs() << " start: " << *V << "\n");
2573 if (Ty != IntTy) {
2574 V = IRB.CreateZExt(V, IntTy, Name + ".ext");
2575 LLVM_DEBUG(dbgs() << " extended: " << *V << "\n");
2576 }
2577 assert(DL.getTypeStoreSize(Ty).getFixedValue() + Offset <=
2578 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2579 "Element store outside of alloca store");
2580 uint64_t ShAmt = 8 * Offset;
2581 if (DL.isBigEndian())
2582 ShAmt = 8 * (DL.getTypeStoreSize(IntTy).getFixedValue() -
2583 DL.getTypeStoreSize(Ty).getFixedValue() - Offset);
2584 if (ShAmt) {
2585 V = IRB.CreateShl(V, ShAmt, Name + ".shift");
2586 LLVM_DEBUG(dbgs() << " shifted: " << *V << "\n");
2587 }
2588
2589 if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
2590 APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
2591 Old = IRB.CreateAnd(Old, Mask, Name + ".mask");
2592 LLVM_DEBUG(dbgs() << " masked: " << *Old << "\n");
2593 V = IRB.CreateOr(Old, V, Name + ".insert");
2594 LLVM_DEBUG(dbgs() << " inserted: " << *V << "\n");
2595 }
2596 return V;
2597}
2598
2599static Value *extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex,
2600 unsigned EndIndex, const Twine &Name) {
2601 auto *VecTy = cast<FixedVectorType>(V->getType());
2602 unsigned NumElements = EndIndex - BeginIndex;
2603 assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
2604
2605 if (NumElements == VecTy->getNumElements())
2606 return V;
2607
2608 if (NumElements == 1) {
2609 V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
2610 Name + ".extract");
2611 LLVM_DEBUG(dbgs() << " extract: " << *V << "\n");
2612 return V;
2613 }
2614
2615 auto Mask = llvm::to_vector<8>(llvm::seq<int>(BeginIndex, EndIndex));
2616 V = IRB.CreateShuffleVector(V, Mask, Name + ".extract");
2617 LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n");
2618 return V;
2619}
2620
2621static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
2622 unsigned BeginIndex, const Twine &Name) {
2623 VectorType *VecTy = cast<VectorType>(Old->getType());
2624 assert(VecTy && "Can only insert a vector into a vector");
2625
2626 VectorType *Ty = dyn_cast<VectorType>(V->getType());
2627 if (!Ty) {
2628 // Single element to insert.
2629 V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
2630 Name + ".insert");
2631 LLVM_DEBUG(dbgs() << " insert: " << *V << "\n");
2632 return V;
2633 }
2634
2635 assert(cast<FixedVectorType>(Ty)->getNumElements() <=
2636 cast<FixedVectorType>(VecTy)->getNumElements() &&
2637 "Too many elements!");
2638 if (cast<FixedVectorType>(Ty)->getNumElements() ==
2639 cast<FixedVectorType>(VecTy)->getNumElements()) {
2640 assert(V->getType() == VecTy && "Vector type mismatch");
2641 return V;
2642 }
2643 unsigned EndIndex = BeginIndex + cast<FixedVectorType>(Ty)->getNumElements();
2644
2645 // When inserting a smaller vector into the larger to store, we first
2646 // use a shuffle vector to widen it with undef elements, and then
2647 // a second shuffle vector to select between the loaded vector and the
2648 // incoming vector.
2650 Mask.reserve(cast<FixedVectorType>(VecTy)->getNumElements());
2651 for (unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
2652 if (i >= BeginIndex && i < EndIndex)
2653 Mask.push_back(i - BeginIndex);
2654 else
2655 Mask.push_back(-1);
2656 V = IRB.CreateShuffleVector(V, Mask, Name + ".expand");
2657 LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n");
2658
2660 Mask2.reserve(cast<FixedVectorType>(VecTy)->getNumElements());
2661 for (unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
2662 Mask2.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
2663
2664 V = IRB.CreateSelect(ConstantVector::get(Mask2), V, Old, Name + "blend");
2665
2666 LLVM_DEBUG(dbgs() << " blend: " << *V << "\n");
2667 return V;
2668}
2669
2670namespace {
2671
2672/// Visitor to rewrite instructions using p particular slice of an alloca
2673/// to use a new alloca.
2674///
2675/// Also implements the rewriting to vector-based accesses when the partition
2676/// passes the isVectorPromotionViable predicate. Most of the rewriting logic
2677/// lives here.
2678class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
2679 // Befriend the base class so it can delegate to private visit methods.
2680 friend class InstVisitor<AllocaSliceRewriter, bool>;
2681
2683
2684 const DataLayout &DL;
2685 AllocaSlices &AS;
2686 SROA &Pass;
2687 AllocaInst &OldAI, &NewAI;
2688 const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
2689 Type *NewAllocaTy;
2690
2691 // This is a convenience and flag variable that will be null unless the new
2692 // alloca's integer operations should be widened to this integer type due to
2693 // passing isIntegerWideningViable above. If it is non-null, the desired
2694 // integer type will be stored here for easy access during rewriting.
2695 IntegerType *IntTy;
2696
2697 // If we are rewriting an alloca partition which can be written as pure
2698 // vector operations, we stash extra information here. When VecTy is
2699 // non-null, we have some strict guarantees about the rewritten alloca:
2700 // - The new alloca is exactly the size of the vector type here.
2701 // - The accesses all either map to the entire vector or to a single
2702 // element.
2703 // - The set of accessing instructions is only one of those handled above
2704 // in isVectorPromotionViable. Generally these are the same access kinds
2705 // which are promotable via mem2reg.
2706 VectorType *VecTy;
2707 Type *ElementTy;
2708 uint64_t ElementSize;
2709
2710 // The original offset of the slice currently being rewritten relative to
2711 // the original alloca.
2712 uint64_t BeginOffset = 0;
2713 uint64_t EndOffset = 0;
2714
2715 // The new offsets of the slice currently being rewritten relative to the
2716 // original alloca.
2717 uint64_t NewBeginOffset = 0, NewEndOffset = 0;
2718
2719 uint64_t SliceSize = 0;
2720 bool IsSplittable = false;
2721 bool IsSplit = false;
2722 Use *OldUse = nullptr;
2723 Instruction *OldPtr = nullptr;
2724
2725 // Track post-rewrite users which are PHI nodes and Selects.
2728
2729 // Utility IR builder, whose name prefix is setup for each visited use, and
2730 // the insertion point is set to point to the user.
2731 IRBuilderTy IRB;
2732
2733 // Return the new alloca, addrspacecasted if required to avoid changing the
2734 // addrspace of a volatile access.
2735 Value *getPtrToNewAI(unsigned AddrSpace, bool IsVolatile) {
2736 if (!IsVolatile || AddrSpace == NewAI.getType()->getPointerAddressSpace())
2737 return &NewAI;
2738
2739 Type *AccessTy = IRB.getPtrTy(AddrSpace);
2740 return IRB.CreateAddrSpaceCast(&NewAI, AccessTy);
2741 }
2742
2743public:
2744 AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &AS, SROA &Pass,
2745 AllocaInst &OldAI, AllocaInst &NewAI,
2746 uint64_t NewAllocaBeginOffset,
2747 uint64_t NewAllocaEndOffset, bool IsIntegerPromotable,
2748 VectorType *PromotableVecTy,
2751 : DL(DL), AS(AS), Pass(Pass), OldAI(OldAI), NewAI(NewAI),
2752 NewAllocaBeginOffset(NewAllocaBeginOffset),
2753 NewAllocaEndOffset(NewAllocaEndOffset),
2754 NewAllocaTy(NewAI.getAllocatedType()),
2755 IntTy(
2756 IsIntegerPromotable
2757 ? Type::getIntNTy(NewAI.getContext(),
2758 DL.getTypeSizeInBits(NewAI.getAllocatedType())
2759 .getFixedValue())
2760 : nullptr),
2761 VecTy(PromotableVecTy),
2762 ElementTy(VecTy ? VecTy->getElementType() : nullptr),
2763 ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8
2764 : 0),
2765 PHIUsers(PHIUsers), SelectUsers(SelectUsers),
2766 IRB(NewAI.getContext(), ConstantFolder()) {
2767 if (VecTy) {
2768 assert((DL.getTypeSizeInBits(ElementTy).getFixedValue() % 8) == 0 &&
2769 "Only multiple-of-8 sized vector elements are viable");
2770 ++NumVectorized;
2771 }
2772 assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy));
2773 }
2774
2775 bool visit(AllocaSlices::const_iterator I) {
2776 bool CanSROA = true;
2777 BeginOffset = I->beginOffset();
2778 EndOffset = I->endOffset();
2779 IsSplittable = I->isSplittable();
2780 IsSplit =
2781 BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
2782 LLVM_DEBUG(dbgs() << " rewriting " << (IsSplit ? "split " : ""));
2783 LLVM_DEBUG(AS.printSlice(dbgs(), I, ""));
2784 LLVM_DEBUG(dbgs() << "\n");
2785
2786 // Compute the intersecting offset range.
2787 assert(BeginOffset < NewAllocaEndOffset);
2788 assert(EndOffset > NewAllocaBeginOffset);
2789 NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
2790 NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
2791
2792 SliceSize = NewEndOffset - NewBeginOffset;
2793 LLVM_DEBUG(dbgs() << " Begin:(" << BeginOffset << ", " << EndOffset
2794 << ") NewBegin:(" << NewBeginOffset << ", "
2795 << NewEndOffset << ") NewAllocaBegin:("
2796 << NewAllocaBeginOffset << ", " << NewAllocaEndOffset
2797 << ")\n");
2798 assert(IsSplit || NewBeginOffset == BeginOffset);
2799 OldUse = I->getUse();
2800 OldPtr = cast<Instruction>(OldUse->get());
2801
2802 Instruction *OldUserI = cast<Instruction>(OldUse->getUser());
2803 IRB.SetInsertPoint(OldUserI);
2804 IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc());
2805 IRB.getInserter().SetNamePrefix(Twine(NewAI.getName()) + "." +
2806 Twine(BeginOffset) + ".");
2807
2808 CanSROA &= visit(cast<Instruction>(OldUse->getUser()));
2809 if (VecTy || IntTy)
2810 assert(CanSROA);
2811 return CanSROA;
2812 }
2813
2814private:
2815 // Make sure the other visit overloads are visible.
2816 using Base::visit;
2817
2818 // Every instruction which can end up as a user must have a rewrite rule.
2819 bool visitInstruction(Instruction &I) {
2820 LLVM_DEBUG(dbgs() << " !!!! Cannot rewrite: " << I << "\n");
2821 llvm_unreachable("No rewrite rule for this instruction!");
2822 }
2823
2824 Value *getNewAllocaSlicePtr(IRBuilderTy &IRB, Type *PointerTy) {
2825 // Note that the offset computation can use BeginOffset or NewBeginOffset
2826 // interchangeably for unsplit slices.
2827 assert(IsSplit || BeginOffset == NewBeginOffset);
2828 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
2829
2830#ifndef NDEBUG
2831 StringRef OldName = OldPtr->getName();
2832 // Skip through the last '.sroa.' component of the name.
2833 size_t LastSROAPrefix = OldName.rfind(".sroa.");
2834 if (LastSROAPrefix != StringRef::npos) {
2835 OldName = OldName.substr(LastSROAPrefix + strlen(".sroa."));
2836 // Look for an SROA slice index.
2837 size_t IndexEnd = OldName.find_first_not_of("0123456789");
2838 if (IndexEnd != StringRef::npos && OldName[IndexEnd] == '.') {
2839 // Strip the index and look for the offset.
2840 OldName = OldName.substr(IndexEnd + 1);
2841 size_t OffsetEnd = OldName.find_first_not_of("0123456789");
2842 if (OffsetEnd != StringRef::npos && OldName[OffsetEnd] == '.')
2843 // Strip the offset.
2844 OldName = OldName.substr(OffsetEnd + 1);
2845 }
2846 }
2847 // Strip any SROA suffixes as well.
2848 OldName = OldName.substr(0, OldName.find(".sroa_"));
2849#endif
2850
2851 return getAdjustedPtr(IRB, DL, &NewAI,
2852 APInt(DL.getIndexTypeSizeInBits(PointerTy), Offset),
2853 PointerTy,
2854#ifndef NDEBUG
2855 Twine(OldName) + "."
2856#else
2857 Twine()
2858#endif
2859 );
2860 }
2861
2862 /// Compute suitable alignment to access this slice of the *new*
2863 /// alloca.
2864 ///
2865 /// You can optionally pass a type to this routine and if that type's ABI
2866 /// alignment is itself suitable, this will return zero.
2867 Align getSliceAlign() {
2868 return commonAlignment(NewAI.getAlign(),
2869 NewBeginOffset - NewAllocaBeginOffset);
2870 }
2871
2872 unsigned getIndex(uint64_t Offset) {
2873 assert(VecTy && "Can only call getIndex when rewriting a vector");
2874 uint64_t RelOffset = Offset - NewAllocaBeginOffset;
2875 assert(RelOffset / ElementSize < UINT32_MAX && "Index out of bounds");
2876 uint32_t Index = RelOffset / ElementSize;
2877 assert(Index * ElementSize == RelOffset);
2878 return Index;
2879 }
2880
2881 void deleteIfTriviallyDead(Value *V) {
2882 Instruction *I = cast<Instruction>(V);
2884 Pass.DeadInsts.push_back(I);
2885 }
2886
2887 Value *rewriteVectorizedLoadInst(LoadInst &LI) {
2888 unsigned BeginIndex = getIndex(NewBeginOffset);
2889 unsigned EndIndex = getIndex(NewEndOffset);
2890 assert(EndIndex > BeginIndex && "Empty vector!");
2891
2892 LoadInst *Load = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
2893 NewAI.getAlign(), "load");
2894
2895 Load->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
2896 LLVMContext::MD_access_group});
2897 return extractVector(IRB, Load, BeginIndex, EndIndex, "vec");
2898 }
2899
2900 Value *rewriteIntegerLoad(LoadInst &LI) {
2901 assert(IntTy && "We cannot insert an integer to the alloca");
2902 assert(!LI.isVolatile());
2903 Value *V = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
2904 NewAI.getAlign(), "load");
2905 V = convertValue(DL, IRB, V, IntTy);
2906 assert(NewBeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
2907 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
2908 if (Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
2909 IntegerType *ExtractTy = Type::getIntNTy(LI.getContext(), SliceSize * 8);
2910 V = extractInteger(DL, IRB, V, ExtractTy, Offset, "extract");
2911 }
2912 // It is possible that the extracted type is not the load type. This
2913 // happens if there is a load past the end of the alloca, and as
2914 // a consequence the slice is narrower but still a candidate for integer
2915 // lowering. To handle this case, we just zero extend the extracted
2916 // integer.
2917 assert(cast<IntegerType>(LI.getType())->getBitWidth() >= SliceSize * 8 &&
2918 "Can only handle an extract for an overly wide load");
2919 if (cast<IntegerType>(LI.getType())->getBitWidth() > SliceSize * 8)
2920 V = IRB.CreateZExt(V, LI.getType());
2921 return V;
2922 }
2923
2924 bool visitLoadInst(LoadInst &LI) {
2925 LLVM_DEBUG(dbgs() << " original: " << LI << "\n");
2926 Value *OldOp = LI.getOperand(0);
2927 assert(OldOp == OldPtr);
2928
2929 AAMDNodes AATags = LI.getAAMetadata();
2930
2931 unsigned AS = LI.getPointerAddressSpace();
2932
2933 Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8)
2934 : LI.getType();
2935 bool IsPtrAdjusted = false;
2936 Value *V;
2937 if (VecTy) {
2938 V = rewriteVectorizedLoadInst(LI);
2939 } else if (IntTy && LI.getType()->isIntegerTy()) {
2940 V = rewriteIntegerLoad(LI);
2941 } else if (NewBeginOffset == NewAllocaBeginOffset &&
2942 NewEndOffset == NewAllocaEndOffset &&
2943 (canConvertValue(DL, NewAllocaTy, TargetTy) ||
2944 (NewAllocaTy->isIntegerTy() && TargetTy->isIntegerTy() &&
2945 DL.getTypeStoreSize(TargetTy).getFixedValue() > SliceSize &&
2946 !LI.isVolatile()))) {
2947 Value *NewPtr =
2948 getPtrToNewAI(LI.getPointerAddressSpace(), LI.isVolatile());
2949 LoadInst *NewLI = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), NewPtr,
2950 NewAI.getAlign(), LI.isVolatile(),
2951 LI.getName());
2952 if (LI.isVolatile())
2953 NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
2954 if (NewLI->isAtomic())
2955 NewLI->setAlignment(LI.getAlign());
2956
2957 // Copy any metadata that is valid for the new load. This may require
2958 // conversion to a different kind of metadata, e.g. !nonnull might change
2959 // to !range or vice versa.
2960 copyMetadataForLoad(*NewLI, LI);
2961
2962 // Do this after copyMetadataForLoad() to preserve the TBAA shift.
2963 if (AATags)
2964 NewLI->setAAMetadata(AATags.adjustForAccess(
2965 NewBeginOffset - BeginOffset, NewLI->getType(), DL));
2966
2967 // Try to preserve nonnull metadata
2968 V = NewLI;
2969
2970 // If this is an integer load past the end of the slice (which means the
2971 // bytes outside the slice are undef or this load is dead) just forcibly
2972 // fix the integer size with correct handling of endianness.
2973 if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
2974 if (auto *TITy = dyn_cast<IntegerType>(TargetTy))
2975 if (AITy->getBitWidth() < TITy->getBitWidth()) {
2976 V = IRB.CreateZExt(V, TITy, "load.ext");
2977 if (DL.isBigEndian())
2978 V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
2979 "endian_shift");
2980 }
2981 } else {
2982 Type *LTy = IRB.getPtrTy(AS);
2983 LoadInst *NewLI =
2984 IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
2985 getSliceAlign(), LI.isVolatile(), LI.getName());
2986
2987 if (AATags)
2988 NewLI->setAAMetadata(AATags.adjustForAccess(
2989 NewBeginOffset - BeginOffset, NewLI->getType(), DL));
2990
2991 if (LI.isVolatile())
2992 NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
2993 NewLI->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
2994 LLVMContext::MD_access_group});
2995
2996 V = NewLI;
2997 IsPtrAdjusted = true;
2998 }
2999 V = convertValue(DL, IRB, V, TargetTy);
3000
3001 if (IsSplit) {
3002 assert(!LI.isVolatile());
3003 assert(LI.getType()->isIntegerTy() &&
3004 "Only integer type loads and stores are split");
3005 assert(SliceSize < DL.getTypeStoreSize(LI.getType()).getFixedValue() &&
3006 "Split load isn't smaller than original load");
3007 assert(DL.typeSizeEqualsStoreSize(LI.getType()) &&
3008 "Non-byte-multiple bit width");
3009 // Move the insertion point just past the load so that we can refer to it.
3010 BasicBlock::iterator LIIt = std::next(LI.getIterator());
3011 // Ensure the insertion point comes before any debug-info immediately
3012 // after the load, so that variable values referring to the load are
3013 // dominated by it.
3014 LIIt.setHeadBit(true);
3015 IRB.SetInsertPoint(LI.getParent(), LIIt);
3016 // Create a placeholder value with the same type as LI to use as the
3017 // basis for the new value. This allows us to replace the uses of LI with
3018 // the computed value, and then replace the placeholder with LI, leaving
3019 // LI only used for this computation.
3020 Value *Placeholder =
3021 new LoadInst(LI.getType(), PoisonValue::get(IRB.getPtrTy(AS)), "",
3022 false, Align(1));
3023 V = insertInteger(DL, IRB, Placeholder, V, NewBeginOffset - BeginOffset,
3024 "insert");
3025 LI.replaceAllUsesWith(V);
3026 Placeholder->replaceAllUsesWith(&LI);
3027 Placeholder->deleteValue();
3028 } else {
3029 LI.replaceAllUsesWith(V);
3030 }
3031
3032 Pass.DeadInsts.push_back(&LI);
3033 deleteIfTriviallyDead(OldOp);
3034 LLVM_DEBUG(dbgs() << " to: " << *V << "\n");
3035 return !LI.isVolatile() && !IsPtrAdjusted;
3036 }
3037
3038 bool rewriteVectorizedStoreInst(Value *V, StoreInst &SI, Value *OldOp,
3039 AAMDNodes AATags) {
3040 // Capture V for the purpose of debug-info accounting once it's converted
3041 // to a vector store.
3042 Value *OrigV = V;
3043 if (V->getType() != VecTy) {
3044 unsigned BeginIndex = getIndex(NewBeginOffset);
3045 unsigned EndIndex = getIndex(NewEndOffset);
3046 assert(EndIndex > BeginIndex && "Empty vector!");
3047 unsigned NumElements = EndIndex - BeginIndex;
3048 assert(NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() &&
3049 "Too many elements!");
3050 Type *SliceTy = (NumElements == 1)
3051 ? ElementTy
3052 : FixedVectorType::get(ElementTy, NumElements);
3053 if (V->getType() != SliceTy)
3054 V = convertValue(DL, IRB, V, SliceTy);
3055
3056 // Mix in the existing elements.
3057 Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3058 NewAI.getAlign(), "load");
3059 V = insertVector(IRB, Old, V, BeginIndex, "vec");
3060 }
3061 StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign());
3062 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3063 LLVMContext::MD_access_group});
3064 if (AATags)
3065 Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3066 V->getType(), DL));
3067 Pass.DeadInsts.push_back(&SI);
3068
3069 // NOTE: Careful to use OrigV rather than V.
3070 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
3071 Store, Store->getPointerOperand(), OrigV, DL);
3072 LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
3073 return true;
3074 }
3075
3076 bool rewriteIntegerStore(Value *V, StoreInst &SI, AAMDNodes AATags) {
3077 assert(IntTy && "We cannot extract an integer from the alloca");
3078 assert(!SI.isVolatile());
3079 if (DL.getTypeSizeInBits(V->getType()).getFixedValue() !=
3080 IntTy->getBitWidth()) {
3081 Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3082 NewAI.getAlign(), "oldload");
3083 Old = convertValue(DL, IRB, Old, IntTy);
3084 assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
3085 uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
3086 V = insertInteger(DL, IRB, Old, SI.getValueOperand(), Offset, "insert");
3087 }
3088 V = convertValue(DL, IRB, V, NewAllocaTy);
3089 StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign());
3090 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3091 LLVMContext::MD_access_group});
3092 if (AATags)
3093 Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3094 V->getType(), DL));
3095
3096 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
3097 Store, Store->getPointerOperand(),
3098 Store->getValueOperand(), DL);
3099
3100 Pass.DeadInsts.push_back(&SI);
3101 LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
3102 return true;
3103 }
3104
3105 bool visitStoreInst(StoreInst &SI) {
3106 LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
3107 Value *OldOp = SI.getOperand(1);
3108 assert(OldOp == OldPtr);
3109
3110 AAMDNodes AATags = SI.getAAMetadata();
3111 Value *V = SI.getValueOperand();
3112
3113 // Strip all inbounds GEPs and pointer casts to try to dig out any root
3114 // alloca that should be re-examined after promoting this alloca.
3115 if (V->getType()->isPointerTy())
3116 if (AllocaInst *AI = dyn_cast<AllocaInst>(V->stripInBoundsOffsets()))
3117 Pass.PostPromotionWorklist.insert(AI);
3118
3119 TypeSize StoreSize = DL.getTypeStoreSize(V->getType());
3120 if (StoreSize.isFixed() && SliceSize < StoreSize.getFixedValue()) {
3121 assert(!SI.isVolatile());
3122 assert(V->getType()->isIntegerTy() &&
3123 "Only integer type loads and stores are split");
3124 assert(DL.typeSizeEqualsStoreSize(V->getType()) &&
3125 "Non-byte-multiple bit width");
3126 IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), SliceSize * 8);
3127 V = extractInteger(DL, IRB, V, NarrowTy, NewBeginOffset - BeginOffset,
3128 "extract");
3129 }
3130
3131 if (VecTy)
3132 return rewriteVectorizedStoreInst(V, SI, OldOp, AATags);
3133 if (IntTy && V->getType()->isIntegerTy())
3134 return rewriteIntegerStore(V, SI, AATags);
3135
3136 StoreInst *NewSI;
3137 if (NewBeginOffset == NewAllocaBeginOffset &&
3138 NewEndOffset == NewAllocaEndOffset &&
3139 canConvertValue(DL, V->getType(), NewAllocaTy)) {
3140 V = convertValue(DL, IRB, V, NewAllocaTy);
3141 Value *NewPtr =
3142 getPtrToNewAI(SI.getPointerAddressSpace(), SI.isVolatile());
3143
3144 NewSI =
3145 IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), SI.isVolatile());
3146 } else {
3147 unsigned AS = SI.getPointerAddressSpace();
3148 Value *NewPtr = getNewAllocaSlicePtr(IRB, IRB.getPtrTy(AS));
3149 NewSI =
3150 IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(), SI.isVolatile());
3151 }
3152 NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3153 LLVMContext::MD_access_group});
3154 if (AATags)
3155 NewSI->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3156 V->getType(), DL));
3157 if (SI.isVolatile())
3158 NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
3159 if (NewSI->isAtomic())
3160 NewSI->setAlignment(SI.getAlign());
3161
3162 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
3163 NewSI, NewSI->getPointerOperand(),
3164 NewSI->getValueOperand(), DL);
3165
3166 Pass.DeadInsts.push_back(&SI);
3167 deleteIfTriviallyDead(OldOp);
3168
3169 LLVM_DEBUG(dbgs() << " to: " << *NewSI << "\n");
3170 return NewSI->getPointerOperand() == &NewAI &&
3171 NewSI->getValueOperand()->getType() == NewAllocaTy &&
3172 !SI.isVolatile();
3173 }
3174
3175 /// Compute an integer value from splatting an i8 across the given
3176 /// number of bytes.
3177 ///
3178 /// Note that this routine assumes an i8 is a byte. If that isn't true, don't
3179 /// call this routine.
3180 /// FIXME: Heed the advice above.
3181 ///
3182 /// \param V The i8 value to splat.
3183 /// \param Size The number of bytes in the output (assuming i8 is one byte)
3184 Value *getIntegerSplat(Value *V, unsigned Size) {
3185 assert(Size > 0 && "Expected a positive number of bytes.");
3186 IntegerType *VTy = cast<IntegerType>(V->getType());
3187 assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte");
3188 if (Size == 1)
3189 return V;
3190
3191 Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size * 8);
3192 V = IRB.CreateMul(
3193 IRB.CreateZExt(V, SplatIntTy, "zext"),
3194 IRB.CreateUDiv(Constant::getAllOnesValue(SplatIntTy),
3195 IRB.CreateZExt(Constant::getAllOnesValue(V->getType()),
3196 SplatIntTy)),
3197 "isplat");
3198 return V;
3199 }
3200
3201 /// Compute a vector splat for a given element value.
3202 Value *getVectorSplat(Value *V, unsigned NumElements) {
3203 V = IRB.CreateVectorSplat(NumElements, V, "vsplat");
3204 LLVM_DEBUG(dbgs() << " splat: " << *V << "\n");
3205 return V;
3206 }
3207
3208 bool visitMemSetInst(MemSetInst &II) {
3209 LLVM_DEBUG(dbgs() << " original: " << II << "\n");
3210 assert(II.getRawDest() == OldPtr);
3211
3212 AAMDNodes AATags = II.getAAMetadata();
3213
3214 // If the memset has a variable size, it cannot be split, just adjust the
3215 // pointer to the new alloca.
3216 if (!isa<ConstantInt>(II.getLength())) {
3217 assert(!IsSplit);
3218 assert(NewBeginOffset == BeginOffset);
3219 II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->getType()));
3220 II.setDestAlignment(getSliceAlign());
3221 // In theory we should call migrateDebugInfo here. However, we do not
3222 // emit dbg.assign intrinsics for mem intrinsics storing through non-
3223 // constant geps, or storing a variable number of bytes.
3225 "AT: Unexpected link to non-const GEP");
3226 deleteIfTriviallyDead(OldPtr);
3227 return false;
3228 }
3229
3230 // Record this instruction for deletion.
3231 Pass.DeadInsts.push_back(&II);
3232
3233 Type *AllocaTy = NewAI.getAllocatedType();
3234 Type *ScalarTy = AllocaTy->getScalarType();
3235
3236 const bool CanContinue = [&]() {
3237 if (VecTy || IntTy)
3238 return true;
3239 if (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset)
3240 return false;
3241 // Length must be in range for FixedVectorType.
3242 auto *C = cast<ConstantInt>(II.getLength());
3243 const uint64_t Len = C->getLimitedValue();
3244 if (Len > std::numeric_limits<unsigned>::max())
3245 return false;
3246 auto *Int8Ty = IntegerType::getInt8Ty(NewAI.getContext());
3247 auto *SrcTy = FixedVectorType::get(Int8Ty, Len);
3248 return canConvertValue(DL, SrcTy, AllocaTy) &&
3249 DL.isLegalInteger(DL.getTypeSizeInBits(ScalarTy).getFixedValue());
3250 }();
3251
3252 // If this doesn't map cleanly onto the alloca type, and that type isn't
3253 // a single value type, just emit a memset.
3254 if (!CanContinue) {
3255 Type *SizeTy = II.getLength()->getType();
3256 unsigned Sz = NewEndOffset - NewBeginOffset;
3257 Constant *Size = ConstantInt::get(SizeTy, Sz);
3258 MemIntrinsic *New = cast<MemIntrinsic>(IRB.CreateMemSet(
3259 getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size,
3260 MaybeAlign(getSliceAlign()), II.isVolatile()));
3261 if (AATags)
3262 New->setAAMetadata(
3263 AATags.adjustForAccess(NewBeginOffset - BeginOffset, Sz));
3264
3265 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
3266 New, New->getRawDest(), nullptr, DL);
3267
3268 LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
3269 return false;
3270 }
3271
3272 // If we can represent this as a simple value, we have to build the actual
3273 // value to store, which requires expanding the byte present in memset to
3274 // a sensible representation for the alloca type. This is essentially
3275 // splatting the byte to a sufficiently wide integer, splatting it across
3276 // any desired vector width, and bitcasting to the final type.
3277 Value *V;
3278
3279 if (VecTy) {
3280 // If this is a memset of a vectorized alloca, insert it.
3281 assert(ElementTy == ScalarTy);
3282
3283 unsigned BeginIndex = getIndex(NewBeginOffset);
3284 unsigned EndIndex = getIndex(NewEndOffset);
3285 assert(EndIndex > BeginIndex && "Empty vector!");
3286 unsigned NumElements = EndIndex - BeginIndex;
3287 assert(NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() &&
3288 "Too many elements!");
3289
3290 Value *Splat = getIntegerSplat(
3291 II.getValue(), DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8);
3292 Splat = convertValue(DL, IRB, Splat, ElementTy);
3293 if (NumElements > 1)
3294 Splat = getVectorSplat(Splat, NumElements);
3295
3296 Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3297 NewAI.getAlign(), "oldload");
3298 V = insertVector(IRB, Old, Splat, BeginIndex, "vec");
3299 } else if (IntTy) {
3300 // If this is a memset on an alloca where we can widen stores, insert the
3301 // set integer.
3302 assert(!II.isVolatile());
3303
3304 uint64_t Size = NewEndOffset - NewBeginOffset;
3305 V = getIntegerSplat(II.getValue(), Size);
3306
3307 if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
3308 EndOffset != NewAllocaBeginOffset)) {
3309 Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3310 NewAI.getAlign(), "oldload");
3311 Old = convertValue(DL, IRB, Old, IntTy);
3312 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3313 V = insertInteger(DL, IRB, Old, V, Offset, "insert");
3314 } else {
3315 assert(V->getType() == IntTy &&
3316 "Wrong type for an alloca wide integer!");
3317 }
3318 V = convertValue(DL, IRB, V, AllocaTy);
3319 } else {
3320 // Established these invariants above.
3321 assert(NewBeginOffset == NewAllocaBeginOffset);
3322 assert(NewEndOffset == NewAllocaEndOffset);
3323
3324 V = getIntegerSplat(II.getValue(),
3325 DL.getTypeSizeInBits(ScalarTy).getFixedValue() / 8);
3326 if (VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy))
3327 V = getVectorSplat(
3328 V, cast<FixedVectorType>(AllocaVecTy)->getNumElements());
3329
3330 V = convertValue(DL, IRB, V, AllocaTy);
3331 }
3332
3333 Value *NewPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile());
3334 StoreInst *New =
3335 IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), II.isVolatile());
3336 New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3337 LLVMContext::MD_access_group});
3338 if (AATags)
3339 New->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3340 V->getType(), DL));
3341
3342 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
3343 New, New->getPointerOperand(), V, DL);
3344
3345 LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
3346 return !II.isVolatile();
3347 }
3348
3349 bool visitMemTransferInst(MemTransferInst &II) {
3350 // Rewriting of memory transfer instructions can be a bit tricky. We break
3351 // them into two categories: split intrinsics and unsplit intrinsics.
3352
3353 LLVM_DEBUG(dbgs() << " original: " << II << "\n");
3354
3355 AAMDNodes AATags = II.getAAMetadata();
3356
3357 bool IsDest = &II.getRawDestUse() == OldUse;
3358 assert((IsDest && II.getRawDest() == OldPtr) ||
3359 (!IsDest && II.getRawSource() == OldPtr));
3360
3361 Align SliceAlign = getSliceAlign();
3362 // For unsplit intrinsics, we simply modify the source and destination
3363 // pointers in place. This isn't just an optimization, it is a matter of
3364 // correctness. With unsplit intrinsics we may be dealing with transfers
3365 // within a single alloca before SROA ran, or with transfers that have
3366 // a variable length. We may also be dealing with memmove instead of
3367 // memcpy, and so simply updating the pointers is the necessary for us to
3368 // update both source and dest of a single call.
3369 if (!IsSplittable) {
3370 Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3371 if (IsDest) {
3372 // Update the address component of linked dbg.assigns.
3373 for (DbgVariableRecord *DbgAssign : at::getDVRAssignmentMarkers(&II)) {
3374 if (llvm::is_contained(DbgAssign->location_ops(), II.getDest()) ||
3375 DbgAssign->getAddress() == II.getDest())
3376 DbgAssign->replaceVariableLocationOp(II.getDest(), AdjustedPtr);
3377 }
3378 II.setDest(AdjustedPtr);
3379 II.setDestAlignment(SliceAlign);
3380 } else {
3381 II.setSource(AdjustedPtr);
3382 II.setSourceAlignment(SliceAlign);
3383 }
3384
3385 LLVM_DEBUG(dbgs() << " to: " << II << "\n");
3386 deleteIfTriviallyDead(OldPtr);
3387 return false;
3388 }
3389 // For split transfer intrinsics we have an incredibly useful assurance:
3390 // the source and destination do not reside within the same alloca, and at
3391 // least one of them does not escape. This means that we can replace
3392 // memmove with memcpy, and we don't need to worry about all manner of
3393 // downsides to splitting and transforming the operations.
3394
3395 // If this doesn't map cleanly onto the alloca type, and that type isn't
3396 // a single value type, just emit a memcpy.
3397 bool EmitMemCpy =
3398 !VecTy && !IntTy &&
3399 (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
3400 SliceSize !=
3401 DL.getTypeStoreSize(NewAI.getAllocatedType()).getFixedValue() ||
3402 !DL.typeSizeEqualsStoreSize(NewAI.getAllocatedType()) ||
3404
3405 // If we're just going to emit a memcpy, the alloca hasn't changed, and the
3406 // size hasn't been shrunk based on analysis of the viable range, this is
3407 // a no-op.
3408 if (EmitMemCpy && &OldAI == &NewAI) {
3409 // Ensure the start lines up.
3410 assert(NewBeginOffset == BeginOffset);
3411
3412 // Rewrite the size as needed.
3413 if (NewEndOffset != EndOffset)
3414 II.setLength(NewEndOffset - NewBeginOffset);
3415 return false;
3416 }
3417 // Record this instruction for deletion.
3418 Pass.DeadInsts.push_back(&II);
3419
3420 // Strip all inbounds GEPs and pointer casts to try to dig out any root
3421 // alloca that should be re-examined after rewriting this instruction.
3422 Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest();
3423 if (AllocaInst *AI =
3424 dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets())) {
3425 assert(AI != &OldAI && AI != &NewAI &&
3426 "Splittable transfers cannot reach the same alloca on both ends.");
3427 Pass.Worklist.insert(AI);
3428 }
3429
3430 Type *OtherPtrTy = OtherPtr->getType();
3431 unsigned OtherAS = OtherPtrTy->getPointerAddressSpace();
3432
3433 // Compute the relative offset for the other pointer within the transfer.
3434 unsigned OffsetWidth = DL.getIndexSizeInBits(OtherAS);
3435 APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);
3436 Align OtherAlign =
3437 (IsDest ? II.getSourceAlign() : II.getDestAlign()).valueOrOne();
3438 OtherAlign =
3439 commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());
3440
3441 if (EmitMemCpy) {
3442 // Compute the other pointer, folding as much as possible to produce
3443 // a single, simple GEP in most cases.
3444 OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
3445 OtherPtr->getName() + ".");
3446
3447 Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3448 Type *SizeTy = II.getLength()->getType();
3449 Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
3450
3451 Value *DestPtr, *SrcPtr;
3452 MaybeAlign DestAlign, SrcAlign;
3453 // Note: IsDest is true iff we're copying into the new alloca slice
3454 if (IsDest) {
3455 DestPtr = OurPtr;
3456 DestAlign = SliceAlign;
3457 SrcPtr = OtherPtr;
3458 SrcAlign = OtherAlign;
3459 } else {
3460 DestPtr = OtherPtr;
3461 DestAlign = OtherAlign;
3462 SrcPtr = OurPtr;
3463 SrcAlign = SliceAlign;
3464 }
3465 CallInst *New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
3466 Size, II.isVolatile());
3467 if (AATags)
3468 New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
3469
3470 APInt Offset(DL.getIndexTypeSizeInBits(DestPtr->getType()), 0);
3471 if (IsDest) {
3472 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8,
3473 &II, New, DestPtr, nullptr, DL);
3474 } else if (AllocaInst *Base = dyn_cast<AllocaInst>(
3476 DL, Offset, /*AllowNonInbounds*/ true))) {
3477 migrateDebugInfo(Base, IsSplit, Offset.getZExtValue() * 8,
3478 SliceSize * 8, &II, New, DestPtr, nullptr, DL);
3479 }
3480 LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
3481 return false;
3482 }
3483
3484 bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
3485 NewEndOffset == NewAllocaEndOffset;
3486 uint64_t Size = NewEndOffset - NewBeginOffset;
3487 unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;
3488 unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
3489 unsigned NumElements = EndIndex - BeginIndex;
3490 IntegerType *SubIntTy =
3491 IntTy ? Type::getIntNTy(IntTy->getContext(), Size * 8) : nullptr;
3492
3493 // Reset the other pointer type to match the register type we're going to
3494 // use, but using the address space of the original other pointer.
3495 Type *OtherTy;
3496 if (VecTy && !IsWholeAlloca) {
3497 if (NumElements == 1)
3498 OtherTy = VecTy->getElementType();
3499 else
3500 OtherTy = FixedVectorType::get(VecTy->getElementType(), NumElements);
3501 } else if (IntTy && !IsWholeAlloca) {
3502 OtherTy = SubIntTy;
3503 } else {
3504 OtherTy = NewAllocaTy;
3505 }
3506
3507 Value *AdjPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
3508 OtherPtr->getName() + ".");
3509 MaybeAlign SrcAlign = OtherAlign;
3510 MaybeAlign DstAlign = SliceAlign;
3511 if (!IsDest)
3512 std::swap(SrcAlign, DstAlign);
3513
3514 Value *SrcPtr;
3515 Value *DstPtr;
3516
3517 if (IsDest) {
3518 DstPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile());
3519 SrcPtr = AdjPtr;
3520 } else {
3521 DstPtr = AdjPtr;
3522 SrcPtr = getPtrToNewAI(II.getSourceAddressSpace(), II.isVolatile());
3523 }
3524
3525 Value *Src;
3526 if (VecTy && !IsWholeAlloca && !IsDest) {
3527 Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3528 NewAI.getAlign(), "load");
3529 Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec");
3530 } else if (IntTy && !IsWholeAlloca && !IsDest) {
3531 Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3532 NewAI.getAlign(), "load");
3533 Src = convertValue(DL, IRB, Src, IntTy);
3534 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3535 Src = extractInteger(DL, IRB, Src, SubIntTy, Offset, "extract");
3536 } else {
3537 LoadInst *Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
3538 II.isVolatile(), "copyload");
3539 Load->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3540 LLVMContext::MD_access_group});
3541 if (AATags)
3542 Load->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3543 Load->getType(), DL));
3544 Src = Load;
3545 }
3546
3547 if (VecTy && !IsWholeAlloca && IsDest) {
3548 Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3549 NewAI.getAlign(), "oldload");
3550 Src = insertVector(IRB, Old, Src, BeginIndex, "vec");
3551 } else if (IntTy && !IsWholeAlloca && IsDest) {
3552 Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3553 NewAI.getAlign(), "oldload");
3554 Old = convertValue(DL, IRB, Old, IntTy);
3555 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3556 Src = insertInteger(DL, IRB, Old, Src, Offset, "insert");
3557 Src = convertValue(DL, IRB, Src, NewAllocaTy);
3558 }
3559
3560 StoreInst *Store = cast<StoreInst>(
3561 IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile()));
3562 Store->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3563 LLVMContext::MD_access_group});
3564 if (AATags)
3565 Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3566 Src->getType(), DL));
3567
3568 APInt Offset(DL.getIndexTypeSizeInBits(DstPtr->getType()), 0);
3569 if (IsDest) {
3570
3571 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
3572 Store, DstPtr, Src, DL);
3573 } else if (AllocaInst *Base = dyn_cast<AllocaInst>(
3575 DL, Offset, /*AllowNonInbounds*/ true))) {
3576 migrateDebugInfo(Base, IsSplit, Offset.getZExtValue() * 8, SliceSize * 8,
3577 &II, Store, DstPtr, Src, DL);
3578 }
3579
3580 LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
3581 return !II.isVolatile();
3582 }
3583
3584 bool visitIntrinsicInst(IntrinsicInst &II) {
3585 assert((II.isLifetimeStartOrEnd() || II.isDroppable()) &&
3586 "Unexpected intrinsic!");
3587 LLVM_DEBUG(dbgs() << " original: " << II << "\n");
3588
3589 // Record this instruction for deletion.
3590 Pass.DeadInsts.push_back(&II);
3591
3592 if (II.isDroppable()) {
3593 assert(II.getIntrinsicID() == Intrinsic::assume && "Expected assume");
3594 // TODO For now we forget assumed information, this can be improved.
3595 OldPtr->dropDroppableUsesIn(II);
3596 return true;
3597 }
3598
3599 assert(II.getArgOperand(0) == OldPtr);
3600 Type *PointerTy = IRB.getPtrTy(OldPtr->getType()->getPointerAddressSpace());
3601 Value *Ptr = getNewAllocaSlicePtr(IRB, PointerTy);
3602 Value *New;
3603 if (II.getIntrinsicID() == Intrinsic::lifetime_start)
3604 New = IRB.CreateLifetimeStart(Ptr);
3605 else
3606 New = IRB.CreateLifetimeEnd(Ptr);
3607
3608 (void)New;
3609 LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
3610
3611 return true;
3612 }
3613
3614 void fixLoadStoreAlign(Instruction &Root) {
3615 // This algorithm implements the same visitor loop as
3616 // hasUnsafePHIOrSelectUse, and fixes the alignment of each load
3617 // or store found.
3620 Visited.insert(&Root);
3621 Uses.push_back(&Root);
3622 do {
3623 Instruction *I = Uses.pop_back_val();
3624
3625 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
3626 LI->setAlignment(std::min(LI->getAlign(), getSliceAlign()));
3627 continue;
3628 }
3629 if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
3630 SI->setAlignment(std::min(SI->getAlign(), getSliceAlign()));
3631 continue;
3632 }
3633
3634 assert(isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I) ||
3635 isa<PHINode>(I) || isa<SelectInst>(I) ||
3636 isa<GetElementPtrInst>(I));
3637 for (User *U : I->users())
3638 if (Visited.insert(cast<Instruction>(U)).second)
3639 Uses.push_back(cast<Instruction>(U));
3640 } while (!Uses.empty());
3641 }
3642
3643 bool visitPHINode(PHINode &PN) {
3644 LLVM_DEBUG(dbgs() << " original: " << PN << "\n");
3645 assert(BeginOffset >= NewAllocaBeginOffset && "PHIs are unsplittable");
3646 assert(EndOffset <= NewAllocaEndOffset && "PHIs are unsplittable");
3647
3648 // We would like to compute a new pointer in only one place, but have it be
3649 // as local as possible to the PHI. To do that, we re-use the location of
3650 // the old pointer, which necessarily must be in the right position to
3651 // dominate the PHI.
3653 if (isa<PHINode>(OldPtr))
3654 IRB.SetInsertPoint(OldPtr->getParent(),
3655 OldPtr->getParent()->getFirstInsertionPt());
3656 else
3657 IRB.SetInsertPoint(OldPtr);
3658 IRB.SetCurrentDebugLocation(OldPtr->getDebugLoc());
3659
3660 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3661 // Replace the operands which were using the old pointer.
3662 std::replace(PN.op_begin(), PN.op_end(), cast<Value>(OldPtr), NewPtr);
3663
3664 LLVM_DEBUG(dbgs() << " to: " << PN << "\n");
3665 deleteIfTriviallyDead(OldPtr);
3666
3667 // Fix the alignment of any loads or stores using this PHI node.
3668 fixLoadStoreAlign(PN);
3669
3670 // PHIs can't be promoted on their own, but often can be speculated. We
3671 // check the speculation outside of the rewriter so that we see the
3672 // fully-rewritten alloca.
3673 PHIUsers.insert(&PN);
3674 return true;
3675 }
3676
3677 bool visitSelectInst(SelectInst &SI) {
3678 LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
3679 assert((SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) &&
3680 "Pointer isn't an operand!");
3681 assert(BeginOffset >= NewAllocaBeginOffset && "Selects are unsplittable");
3682 assert(EndOffset <= NewAllocaEndOffset && "Selects are unsplittable");
3683
3684 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3685 // Replace the operands which were using the old pointer.
3686 if (SI.getOperand(1) == OldPtr)
3687 SI.setOperand(1, NewPtr);
3688 if (SI.getOperand(2) == OldPtr)
3689 SI.setOperand(2, NewPtr);
3690
3691 LLVM_DEBUG(dbgs() << " to: " << SI << "\n");
3692 deleteIfTriviallyDead(OldPtr);
3693
3694 // Fix the alignment of any loads or stores using this select.
3695 fixLoadStoreAlign(SI);
3696
3697 // Selects can't be promoted on their own, but often can be speculated. We
3698 // check the speculation outside of the rewriter so that we see the
3699 // fully-rewritten alloca.
3700 SelectUsers.insert(&SI);
3701 return true;
3702 }
3703};
3704
3705/// Visitor to rewrite aggregate loads and stores as scalar.
3706///
3707/// This pass aggressively rewrites all aggregate loads and stores on
3708/// a particular pointer (or any pointer derived from it which we can identify)
3709/// with scalar loads and stores.
3710class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
3711 // Befriend the base class so it can delegate to private visit methods.
3712 friend class InstVisitor<AggLoadStoreRewriter, bool>;
3713
3714 /// Queue of pointer uses to analyze and potentially rewrite.
3716
3717 /// Set to prevent us from cycling with phi nodes and loops.
3718 SmallPtrSet<User *, 8> Visited;
3719
3720 /// The current pointer use being rewritten. This is used to dig up the used
3721 /// value (as opposed to the user).
3722 Use *U = nullptr;
3723
3724 /// Used to calculate offsets, and hence alignment, of subobjects.
3725 const DataLayout &DL;
3726
3727 IRBuilderTy &IRB;
3728
3729public:
3730 AggLoadStoreRewriter(const DataLayout &DL, IRBuilderTy &IRB)
3731 : DL(DL), IRB(IRB) {}
3732
3733 /// Rewrite loads and stores through a pointer and all pointers derived from
3734 /// it.
3735 bool rewrite(Instruction &I) {
3736 LLVM_DEBUG(dbgs() << " Rewriting FCA loads and stores...\n");
3737 enqueueUsers(I);
3738 bool Changed = false;
3739 while (!Queue.empty()) {
3740 U = Queue.pop_back_val();
3741 Changed |= visit(cast<Instruction>(U->getUser()));
3742 }
3743 return Changed;
3744 }
3745
3746private:
3747 /// Enqueue all the users of the given instruction for further processing.
3748 /// This uses a set to de-duplicate users.
3749 void enqueueUsers(Instruction &I) {
3750 for (Use &U : I.uses())
3751 if (Visited.insert(U.getUser()).second)
3752 Queue.push_back(&U);
3753 }
3754
3755 // Conservative default is to not rewrite anything.
3756 bool visitInstruction(Instruction &I) { return false; }
3757
3758 /// Generic recursive split emission class.
3759 template <typename Derived> class OpSplitter {
3760 protected:
3761 /// The builder used to form new instructions.
3762 IRBuilderTy &IRB;
3763
3764 /// The indices which to be used with insert- or extractvalue to select the
3765 /// appropriate value within the aggregate.
3767
3768 /// The indices to a GEP instruction which will move Ptr to the correct slot
3769 /// within the aggregate.
3770 SmallVector<Value *, 4> GEPIndices;
3771
3772 /// The base pointer of the original op, used as a base for GEPing the
3773 /// split operations.
3774 Value *Ptr;
3775
3776 /// The base pointee type being GEPed into.
3777 Type *BaseTy;
3778
3779 /// Known alignment of the base pointer.
3780 Align BaseAlign;
3781
3782 /// To calculate offset of each component so we can correctly deduce
3783 /// alignments.
3784 const DataLayout &DL;
3785
3786 /// Initialize the splitter with an insertion point, Ptr and start with a
3787 /// single zero GEP index.
3789 Align BaseAlign, const DataLayout &DL, IRBuilderTy &IRB)
3790 : IRB(IRB), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr), BaseTy(BaseTy),
3791 BaseAlign(BaseAlign), DL(DL) {
3792 IRB.SetInsertPoint(InsertionPoint);
3793 }
3794
3795 public:
3796 /// Generic recursive split emission routine.
3797 ///
3798 /// This method recursively splits an aggregate op (load or store) into
3799 /// scalar or vector ops. It splits recursively until it hits a single value
3800 /// and emits that single value operation via the template argument.
3801 ///
3802 /// The logic of this routine relies on GEPs and insertvalue and
3803 /// extractvalue all operating with the same fundamental index list, merely
3804 /// formatted differently (GEPs need actual values).
3805 ///
3806 /// \param Ty The type being split recursively into smaller ops.
3807 /// \param Agg The aggregate value being built up or stored, depending on
3808 /// whether this is splitting a load or a store respectively.
3809 void emitSplitOps(Type *Ty, Value *&Agg, const Twine &Name) {
3810 if (Ty->isSingleValueType()) {
3811 unsigned Offset = DL.getIndexedOffsetInType(BaseTy, GEPIndices);
3812 return static_cast<Derived *>(this)->emitFunc(
3813 Ty, Agg, commonAlignment(BaseAlign, Offset), Name);
3814 }
3815
3816 if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
3817 unsigned OldSize = Indices.size();
3818 (void)OldSize;
3819 for (unsigned Idx = 0, Size = ATy->getNumElements(); Idx != Size;
3820 ++Idx) {
3821 assert(Indices.size() == OldSize && "Did not return to the old size");
3822 Indices.push_back(Idx);
3823 GEPIndices.push_back(IRB.getInt32(Idx));
3824 emitSplitOps(ATy->getElementType(), Agg, Name + "." + Twine(Idx));
3825 GEPIndices.pop_back();
3826 Indices.pop_back();
3827 }
3828 return;
3829 }
3830
3831 if (StructType *STy = dyn_cast<StructType>(Ty)) {
3832 unsigned OldSize = Indices.size();
3833 (void)OldSize;
3834 for (unsigned Idx = 0, Size = STy->getNumElements(); Idx != Size;
3835 ++Idx) {
3836 assert(Indices.size() == OldSize && "Did not return to the old size");
3837 Indices.push_back(Idx);
3838 GEPIndices.push_back(IRB.getInt32(Idx));
3839 emitSplitOps(STy->getElementType(Idx), Agg, Name + "." + Twine(Idx));
3840 GEPIndices.pop_back();
3841 Indices.pop_back();
3842 }
3843 return;
3844 }
3845
3846 llvm_unreachable("Only arrays and structs are aggregate loadable types");
3847 }
3848 };
3849
3850 struct LoadOpSplitter : public OpSplitter<LoadOpSplitter> {
3851 AAMDNodes AATags;
3852 // A vector to hold the split components that we want to emit
3853 // separate fake uses for.
3854 SmallVector<Value *, 4> Components;
3855 // A vector to hold all the fake uses of the struct that we are splitting.
3856 // Usually there should only be one, but we are handling the general case.
3858
3859 LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
3860 AAMDNodes AATags, Align BaseAlign, const DataLayout &DL,
3861 IRBuilderTy &IRB)
3862 : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign, DL,
3863 IRB),
3864 AATags(AATags) {}
3865
3866 /// Emit a leaf load of a single value. This is called at the leaves of the
3867 /// recursive emission to actually load values.
3868 void emitFunc(Type *Ty, Value *&Agg, Align Alignment, const Twine &Name) {
3870 // Load the single value and insert it using the indices.
3871 Value *GEP =
3872 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
3873 LoadInst *Load =
3874 IRB.CreateAlignedLoad(Ty, GEP, Alignment, Name + ".load");
3875
3876 APInt Offset(
3877 DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
3878 if (AATags &&
3880 Load->setAAMetadata(
3881 AATags.adjustForAccess(Offset.getZExtValue(), Load->getType(), DL));
3882 // Record the load so we can generate a fake use for this aggregate
3883 // component.
3884 Components.push_back(Load);
3885
3886 Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
3887 LLVM_DEBUG(dbgs() << " to: " << *Load << "\n");
3888 }
3889
3890 // Stash the fake uses that use the value generated by this instruction.
3891 void recordFakeUses(LoadInst &LI) {
3892 for (Use &U : LI.uses())
3893 if (auto *II = dyn_cast<IntrinsicInst>(U.getUser()))
3894 if (II->getIntrinsicID() == Intrinsic::fake_use)
3895 FakeUses.push_back(II);
3896 }
3897
3898 // Replace all fake uses of the aggregate with a series of fake uses, one
3899 // for each split component.
3900 void emitFakeUses() {
3901 for (Instruction *I : FakeUses) {
3902 IRB.SetInsertPoint(I);
3903 for (auto *V : Components)
3904 IRB.CreateIntrinsic(Intrinsic::fake_use, {V});
3905 I->eraseFromParent();
3906 }
3907 }
3908 };
3909
3910 bool visitLoadInst(LoadInst &LI) {
3911 assert(LI.getPointerOperand() == *U);
3912 if (!LI.isSimple() || LI.getType()->isSingleValueType())
3913 return false;
3914
3915 // We have an aggregate being loaded, split it apart.
3916 LLVM_DEBUG(dbgs() << " original: " << LI << "\n");
3917 LoadOpSplitter Splitter(&LI, *U, LI.getType(), LI.getAAMetadata(),
3918 getAdjustedAlignment(&LI, 0), DL, IRB);
3919 Splitter.recordFakeUses(LI);
3921 Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca");
3922 Splitter.emitFakeUses();
3923 Visited.erase(&LI);
3924 LI.replaceAllUsesWith(V);
3925 LI.eraseFromParent();
3926 return true;
3927 }
3928
3929 struct StoreOpSplitter : public OpSplitter<StoreOpSplitter> {
3930 StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
3931 AAMDNodes AATags, StoreInst *AggStore, Align BaseAlign,
3932 const DataLayout &DL, IRBuilderTy &IRB)
3933 : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
3934 DL, IRB),
3935 AATags(AATags), AggStore(AggStore) {}
3936 AAMDNodes AATags;
3937 StoreInst *AggStore;
3938 /// Emit a leaf store of a single value. This is called at the leaves of the
3939 /// recursive emission to actually produce stores.
3940 void emitFunc(Type *Ty, Value *&Agg, Align Alignment, const Twine &Name) {
3942 // Extract the single value and store it using the indices.
3943 //
3944 // The gep and extractvalue values are factored out of the CreateStore
3945 // call to make the output independent of the argument evaluation order.
3946 Value *ExtractValue =
3947 IRB.CreateExtractValue(Agg, Indices, Name + ".extract");
3948 Value *InBoundsGEP =
3949 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
3950 StoreInst *Store =
3951 IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);
3952
3953 APInt Offset(
3954 DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
3956 if (AATags) {
3957 Store->setAAMetadata(AATags.adjustForAccess(
3958 Offset.getZExtValue(), ExtractValue->getType(), DL));
3959 }
3960
3961 // migrateDebugInfo requires the base Alloca. Walk to it from this gep.
3962 // If we cannot (because there's an intervening non-const or unbounded
3963 // gep) then we wouldn't expect to see dbg.assign intrinsics linked to
3964 // this instruction.
3966 if (auto *OldAI = dyn_cast<AllocaInst>(Base)) {
3967 uint64_t SizeInBits =
3968 DL.getTypeSizeInBits(Store->getValueOperand()->getType());
3969 migrateDebugInfo(OldAI, /*IsSplit*/ true, Offset.getZExtValue() * 8,
3970 SizeInBits, AggStore, Store,
3971 Store->getPointerOperand(), Store->getValueOperand(),
3972 DL);
3973 } else {
3974 assert(at::getDVRAssignmentMarkers(Store).empty() &&
3975 "AT: unexpected debug.assign linked to store through "
3976 "unbounded GEP");
3977 }
3978 LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
3979 }
3980 };
3981
3982 bool visitStoreInst(StoreInst &SI) {
3983 if (!SI.isSimple() || SI.getPointerOperand() != *U)
3984 return false;
3985 Value *V = SI.getValueOperand();
3986 if (V->getType()->isSingleValueType())
3987 return false;
3988
3989 // We have an aggregate being stored, split it apart.
3990 LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
3991 StoreOpSplitter Splitter(&SI, *U, V->getType(), SI.getAAMetadata(), &SI,
3992 getAdjustedAlignment(&SI, 0), DL, IRB);
3993 Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca");
3994 Visited.erase(&SI);
3995 // The stores replacing SI each have markers describing fragments of the
3996 // assignment so delete the assignment markers linked to SI.
3998 SI.eraseFromParent();
3999 return true;
4000 }
4001
4002 bool visitBitCastInst(BitCastInst &BC) {
4003 enqueueUsers(BC);
4004 return false;
4005 }
4006
4007 bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
4008 enqueueUsers(ASC);
4009 return false;
4010 }
4011
4012 // Unfold gep (select cond, ptr1, ptr2), idx
4013 // => select cond, gep(ptr1, idx), gep(ptr2, idx)
4014 // and gep ptr, (select cond, idx1, idx2)
4015 // => select cond, gep(ptr, idx1), gep(ptr, idx2)
4016 // We also allow for i1 zext indices, which are equivalent to selects.
4017 bool unfoldGEPSelect(GetElementPtrInst &GEPI) {
4018 // Check whether the GEP has exactly one select operand and all indices
4019 // will become constant after the transform.
4020 Instruction *Sel = dyn_cast<SelectInst>(GEPI.getPointerOperand());
4021 for (Value *Op : GEPI.indices()) {
4022 if (auto *SI = dyn_cast<SelectInst>(Op)) {
4023 if (Sel)
4024 return false;
4025
4026 Sel = SI;
4027 if (!isa<ConstantInt>(SI->getTrueValue()) ||
4028 !isa<ConstantInt>(SI->getFalseValue()))
4029 return false;
4030 continue;
4031 }
4032 if (auto *ZI = dyn_cast<ZExtInst>(Op)) {
4033 if (Sel)
4034 return false;
4035 Sel = ZI;
4036 if (!ZI->getSrcTy()->isIntegerTy(1))
4037 return false;
4038 continue;
4039 }
4040
4041 if (!isa<ConstantInt>(Op))
4042 return false;
4043 }
4044
4045 if (!Sel)
4046 return false;
4047
4048 LLVM_DEBUG(dbgs() << " Rewriting gep(select) -> select(gep):\n";
4049 dbgs() << " original: " << *Sel << "\n";
4050 dbgs() << " " << GEPI << "\n";);
4051
4052 auto GetNewOps = [&](Value *SelOp) {
4053 SmallVector<Value *> NewOps;
4054 for (Value *Op : GEPI.operands())
4055 if (Op == Sel)
4056 NewOps.push_back(SelOp);
4057 else
4058 NewOps.push_back(Op);
4059 return NewOps;
4060 };
4061
4062 Value *Cond, *True, *False;
4063 if (auto *SI = dyn_cast<SelectInst>(Sel)) {
4064 Cond = SI->getCondition();
4065 True = SI->getTrueValue();
4066 False = SI->getFalseValue();
4067 } else {
4068 Cond = Sel->getOperand(0);
4069 True = ConstantInt::get(Sel->getType(), 1);
4070 False = ConstantInt::get(Sel->getType(), 0);
4071 }
4072 SmallVector<Value *> TrueOps = GetNewOps(True);
4073 SmallVector<Value *> FalseOps = GetNewOps(False);
4074
4075 IRB.SetInsertPoint(&GEPI);
4076 GEPNoWrapFlags NW = GEPI.getNoWrapFlags();
4077
4078 Type *Ty = GEPI.getSourceElementType();
4079 Value *NTrue = IRB.CreateGEP(Ty, TrueOps[0], ArrayRef(TrueOps).drop_front(),
4080 True->getName() + ".sroa.gep", NW);
4081
4082 Value *NFalse =
4083 IRB.CreateGEP(Ty, FalseOps[0], ArrayRef(FalseOps).drop_front(),
4084 False->getName() + ".sroa.gep", NW);
4085
4086 Value *NSel =
4087 IRB.CreateSelect(Cond, NTrue, NFalse, Sel->getName() + ".sroa.sel");
4088 Visited.erase(&GEPI);
4089 GEPI.replaceAllUsesWith(NSel);
4090 GEPI.eraseFromParent();
4091 Instruction *NSelI = cast<Instruction>(NSel);
4092 Visited.insert(NSelI);
4093 enqueueUsers(*NSelI);
4094
4095 LLVM_DEBUG(dbgs() << " to: " << *NTrue << "\n";
4096 dbgs() << " " << *NFalse << "\n";
4097 dbgs() << " " << *NSel << "\n";);
4098
4099 return true;
4100 }
4101
4102 // Unfold gep (phi ptr1, ptr2), idx
4103 // => phi ((gep ptr1, idx), (gep ptr2, idx))
4104 // and gep ptr, (phi idx1, idx2)
4105 // => phi ((gep ptr, idx1), (gep ptr, idx2))
4106 bool unfoldGEPPhi(GetElementPtrInst &GEPI) {
4107 // To prevent infinitely expanding recursive phis, bail if the GEP pointer
4108 // operand (looking through the phi if it is the phi we want to unfold) is
4109 // an instruction besides a static alloca.
4110 PHINode *Phi = dyn_cast<PHINode>(GEPI.getPointerOperand());
4111 auto IsInvalidPointerOperand = [](Value *V) {
4112 if (!isa<Instruction>(V))
4113 return false;
4114 if (auto *AI = dyn_cast<AllocaInst>(V))
4115 return !AI->isStaticAlloca();
4116 return true;
4117 };
4118 if (Phi) {
4119 if (any_of(Phi->operands(), IsInvalidPointerOperand))
4120 return false;
4121 } else {
4122 if (IsInvalidPointerOperand(GEPI.getPointerOperand()))
4123 return false;
4124 }
4125 // Check whether the GEP has exactly one phi operand (including the pointer
4126 // operand) and all indices will become constant after the transform.
4127 for (Value *Op : GEPI.indices()) {
4128 if (auto *SI = dyn_cast<PHINode>(Op)) {
4129 if (Phi)
4130 return false;
4131
4132 Phi = SI;
4133 if (!all_of(Phi->incoming_values(),
4134 [](Value *V) { return isa<ConstantInt>(V); }))
4135 return false;
4136 continue;
4137 }
4138
4139 if (!isa<ConstantInt>(Op))
4140 return false;
4141 }
4142
4143 if (!Phi)
4144 return false;
4145
4146 LLVM_DEBUG(dbgs() << " Rewriting gep(phi) -> phi(gep):\n";
4147 dbgs() << " original: " << *Phi << "\n";
4148 dbgs() << " " << GEPI << "\n";);
4149
4150 auto GetNewOps = [&](Value *PhiOp) {
4151 SmallVector<Value *> NewOps;
4152 for (Value *Op : GEPI.operands())
4153 if (Op == Phi)
4154 NewOps.push_back(PhiOp);
4155 else
4156 NewOps.push_back(Op);
4157 return NewOps;
4158 };
4159
4160 IRB.SetInsertPoint(Phi);
4161 PHINode *NewPhi = IRB.CreatePHI(GEPI.getType(), Phi->getNumIncomingValues(),
4162 Phi->getName() + ".sroa.phi");
4163
4164 Type *SourceTy = GEPI.getSourceElementType();
4165 // We only handle arguments, constants, and static allocas here, so we can
4166 // insert GEPs at the end of the entry block.
4167 IRB.SetInsertPoint(GEPI.getFunction()->getEntryBlock().getTerminator());
4168 for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
4169 Value *Op = Phi->getIncomingValue(I);
4170 BasicBlock *BB = Phi->getIncomingBlock(I);
4171 Value *NewGEP;
4172 if (int NI = NewPhi->getBasicBlockIndex(BB); NI >= 0) {
4173 NewGEP = NewPhi->getIncomingValue(NI);
4174 } else {
4175 SmallVector<Value *> NewOps = GetNewOps(Op);
4176 NewGEP =
4177 IRB.CreateGEP(SourceTy, NewOps[0], ArrayRef(NewOps).drop_front(),
4178 Phi->getName() + ".sroa.gep", GEPI.getNoWrapFlags());
4179 }
4180 NewPhi->addIncoming(NewGEP, BB);
4181 }
4182
4183 Visited.erase(&GEPI);
4184 GEPI.replaceAllUsesWith(NewPhi);
4185 GEPI.eraseFromParent();
4186 Visited.insert(NewPhi);
4187 enqueueUsers(*NewPhi);
4188
4189 LLVM_DEBUG(dbgs() << " to: ";
4190 for (Value *In
4191 : NewPhi->incoming_values()) dbgs()
4192 << "\n " << *In;
4193 dbgs() << "\n " << *NewPhi << '\n');
4194
4195 return true;
4196 }
4197
4198 bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
4199 if (unfoldGEPSelect(GEPI))
4200 return true;
4201
4202 if (unfoldGEPPhi(GEPI))
4203 return true;
4204
4205 enqueueUsers(GEPI);
4206 return false;
4207 }
4208
4209 bool visitPHINode(PHINode &PN) {
4210 enqueueUsers(PN);
4211 return false;
4212 }
4213
4214 bool visitSelectInst(SelectInst &SI) {
4215 enqueueUsers(SI);
4216 return false;
4217 }
4218};
4219
4220} // end anonymous namespace
4221
4222/// Strip aggregate type wrapping.
4223///
4224/// This removes no-op aggregate types wrapping an underlying type. It will
4225/// strip as many layers of types as it can without changing either the type
4226/// size or the allocated size.
4228 if (Ty->isSingleValueType())
4229 return Ty;
4230
4231 uint64_t AllocSize = DL.getTypeAllocSize(Ty).getFixedValue();
4232 uint64_t TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
4233
4234 Type *InnerTy;
4235 if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
4236 InnerTy = ArrTy->getElementType();
4237 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
4238 const StructLayout *SL = DL.getStructLayout(STy);
4239 unsigned Index = SL->getElementContainingOffset(0);
4240 InnerTy = STy->getElementType(Index);
4241 } else {
4242 return Ty;
4243 }
4244
4245 if (AllocSize > DL.getTypeAllocSize(InnerTy).getFixedValue() ||
4246 TypeSize > DL.getTypeSizeInBits(InnerTy).getFixedValue())
4247 return Ty;
4248
4249 return stripAggregateTypeWrapping(DL, InnerTy);
4250}
4251
4252/// Try to find a partition of the aggregate type passed in for a given
4253/// offset and size.
4254///
4255/// This recurses through the aggregate type and tries to compute a subtype
4256/// based on the offset and size. When the offset and size span a sub-section
4257/// of an array, it will even compute a new array type for that sub-section,
4258/// and the same for structs.
4259///
4260/// Note that this routine is very strict and tries to find a partition of the
4261/// type which produces the *exact* right offset and size. It is not forgiving
4262/// when the size or offset cause either end of type-based partition to be off.
4263/// Also, this is a best-effort routine. It is reasonable to give up and not
4264/// return a type if necessary.
4266 uint64_t Size) {
4267 if (Offset == 0 && DL.getTypeAllocSize(Ty).getFixedValue() == Size)
4268 return stripAggregateTypeWrapping(DL, Ty);
4269 if (Offset > DL.getTypeAllocSize(Ty).getFixedValue() ||
4270 (DL.getTypeAllocSize(Ty).getFixedValue() - Offset) < Size)
4271 return nullptr;
4272
4273 if (isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
4274 Type *ElementTy;
4275 uint64_t TyNumElements;
4276 if (auto *AT = dyn_cast<ArrayType>(Ty)) {
4277 ElementTy = AT->getElementType();
4278 TyNumElements = AT->getNumElements();
4279 } else {
4280 // FIXME: This isn't right for vectors with non-byte-sized or
4281 // non-power-of-two sized elements.
4282 auto *VT = cast<FixedVectorType>(Ty);
4283 ElementTy = VT->getElementType();
4284 TyNumElements = VT->getNumElements();
4285 }
4286 uint64_t ElementSize = DL.getTypeAllocSize(ElementTy).getFixedValue();
4287 uint64_t NumSkippedElements = Offset / ElementSize;
4288 if (NumSkippedElements >= TyNumElements)
4289 return nullptr;
4290 Offset -= NumSkippedElements * ElementSize;
4291
4292 // First check if we need to recurse.
4293 if (Offset > 0 || Size < ElementSize) {
4294 // Bail if the partition ends in a different array element.
4295 if ((Offset + Size) > ElementSize)
4296 return nullptr;
4297 // Recurse through the element type trying to peel off offset bytes.
4298 return getTypePartition(DL, ElementTy, Offset, Size);
4299 }
4300 assert(Offset == 0);
4301
4302 if (Size == ElementSize)
4303 return stripAggregateTypeWrapping(DL, ElementTy);
4304 assert(Size > ElementSize);
4305 uint64_t NumElements = Size / ElementSize;
4306 if (NumElements * ElementSize != Size)
4307 return nullptr;
4308 return ArrayType::get(ElementTy, NumElements);
4309 }
4310
4311 StructType *STy = dyn_cast<StructType>(Ty);
4312 if (!STy)
4313 return nullptr;
4314
4315 const StructLayout *SL = DL.getStructLayout(STy);
4316
4317 if (SL->getSizeInBits().isScalable())
4318 return nullptr;
4319
4320 if (Offset >= SL->getSizeInBytes())
4321 return nullptr;
4322 uint64_t EndOffset = Offset + Size;
4323 if (EndOffset > SL->getSizeInBytes())
4324 return nullptr;
4325
4326 unsigned Index = SL->getElementContainingOffset(Offset);
4327 Offset -= SL->getElementOffset(Index);
4328
4329 Type *ElementTy = STy->getElementType(Index);
4330 uint64_t ElementSize = DL.getTypeAllocSize(ElementTy).getFixedValue();
4331 if (Offset >= ElementSize)
4332 return nullptr; // The offset points into alignment padding.
4333
4334 // See if any partition must be contained by the element.
4335 if (Offset > 0 || Size < ElementSize) {
4336 if ((Offset + Size) > ElementSize)
4337 return nullptr;
4338 return getTypePartition(DL, ElementTy, Offset, Size);
4339 }
4340 assert(Offset == 0);
4341
4342 if (Size == ElementSize)
4343 return stripAggregateTypeWrapping(DL, ElementTy);
4344
4345 StructType::element_iterator EI = STy->element_begin() + Index,
4346 EE = STy->element_end();
4347 if (EndOffset < SL->getSizeInBytes()) {
4348 unsigned EndIndex = SL->getElementContainingOffset(EndOffset);
4349 if (Index == EndIndex)
4350 return nullptr; // Within a single element and its padding.
4351
4352 // Don't try to form "natural" types if the elements don't line up with the
4353 // expected size.
4354 // FIXME: We could potentially recurse down through the last element in the
4355 // sub-struct to find a natural end point.
4356 if (SL->getElementOffset(EndIndex) != EndOffset)
4357 return nullptr;
4358
4359 assert(Index < EndIndex);
4360 EE = STy->element_begin() + EndIndex;
4361 }
4362
4363 // Try to build up a sub-structure.
4364 StructType *SubTy =
4365 StructType::get(STy->getContext(), ArrayRef(EI, EE), STy->isPacked());
4366 const StructLayout *SubSL = DL.getStructLayout(SubTy);
4367 if (Size != SubSL->getSizeInBytes())
4368 return nullptr; // The sub-struct doesn't have quite the size needed.
4369
4370 return SubTy;
4371}
4372
4373/// Pre-split loads and stores to simplify rewriting.
4374///
4375/// We want to break up the splittable load+store pairs as much as
4376/// possible. This is important to do as a preprocessing step, as once we
4377/// start rewriting the accesses to partitions of the alloca we lose the
4378/// necessary information to correctly split apart paired loads and stores
4379/// which both point into this alloca. The case to consider is something like
4380/// the following:
4381///
4382/// %a = alloca [12 x i8]
4383/// %gep1 = getelementptr i8, ptr %a, i32 0
4384/// %gep2 = getelementptr i8, ptr %a, i32 4
4385/// %gep3 = getelementptr i8, ptr %a, i32 8
4386/// store float 0.0, ptr %gep1
4387/// store float 1.0, ptr %gep2
4388/// %v = load i64, ptr %gep1
4389/// store i64 %v, ptr %gep2
4390/// %f1 = load float, ptr %gep2
4391/// %f2 = load float, ptr %gep3
4392///
4393/// Here we want to form 3 partitions of the alloca, each 4 bytes large, and
4394/// promote everything so we recover the 2 SSA values that should have been
4395/// there all along.
4396///
4397/// \returns true if any changes are made.
4398bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
4399 LLVM_DEBUG(dbgs() << "Pre-splitting loads and stores\n");
4400
4401 // Track the loads and stores which are candidates for pre-splitting here, in
4402 // the order they first appear during the partition scan. These give stable
4403 // iteration order and a basis for tracking which loads and stores we
4404 // actually split.
4407
4408 // We need to accumulate the splits required of each load or store where we
4409 // can find them via a direct lookup. This is important to cross-check loads
4410 // and stores against each other. We also track the slice so that we can kill
4411 // all the slices that end up split.
4412 struct SplitOffsets {
4413 Slice *S;
4414 std::vector<uint64_t> Splits;
4415 };
4417
4418 // Track loads out of this alloca which cannot, for any reason, be pre-split.
4419 // This is important as we also cannot pre-split stores of those loads!
4420 // FIXME: This is all pretty gross. It means that we can be more aggressive
4421 // in pre-splitting when the load feeding the store happens to come from
4422 // a separate alloca. Put another way, the effectiveness of SROA would be
4423 // decreased by a frontend which just concatenated all of its local allocas
4424 // into one big flat alloca. But defeating such patterns is exactly the job
4425 // SROA is tasked with! Sadly, to not have this discrepancy we would have
4426 // change store pre-splitting to actually force pre-splitting of the load
4427 // that feeds it *and all stores*. That makes pre-splitting much harder, but
4428 // maybe it would make it more principled?
4429 SmallPtrSet<LoadInst *, 8> UnsplittableLoads;
4430
4431 LLVM_DEBUG(dbgs() << " Searching for candidate loads and stores\n");
4432 for (auto &P : AS.partitions()) {
4433 for (Slice &S : P) {
4434 Instruction *I = cast<Instruction>(S.getUse()->getUser());
4435 if (!S.isSplittable() || S.endOffset() <= P.endOffset()) {
4436 // If this is a load we have to track that it can't participate in any
4437 // pre-splitting. If this is a store of a load we have to track that
4438 // that load also can't participate in any pre-splitting.
4439 if (auto *LI = dyn_cast<LoadInst>(I))
4440 UnsplittableLoads.insert(LI);
4441 else if (auto *SI = dyn_cast<StoreInst>(I))
4442 if (auto *LI = dyn_cast<LoadInst>(SI->getValueOperand()))
4443 UnsplittableLoads.insert(LI);
4444 continue;
4445 }
4446 assert(P.endOffset() > S.beginOffset() &&
4447 "Empty or backwards partition!");
4448
4449 // Determine if this is a pre-splittable slice.
4450 if (auto *LI = dyn_cast<LoadInst>(I)) {
4451 assert(!LI->isVolatile() && "Cannot split volatile loads!");
4452
4453 // The load must be used exclusively to store into other pointers for
4454 // us to be able to arbitrarily pre-split it. The stores must also be
4455 // simple to avoid changing semantics.
4456 auto IsLoadSimplyStored = [](LoadInst *LI) {
4457 for (User *LU : LI->users()) {
4458 auto *SI = dyn_cast<StoreInst>(LU);
4459 if (!SI || !SI->isSimple())
4460 return false;
4461 }
4462 return true;
4463 };
4464 if (!IsLoadSimplyStored(LI)) {
4465 UnsplittableLoads.insert(LI);
4466 continue;
4467 }
4468
4469 Loads.push_back(LI);
4470 } else if (auto *SI = dyn_cast<StoreInst>(I)) {
4471 if (S.getUse() != &SI->getOperandUse(SI->getPointerOperandIndex()))
4472 // Skip stores *of* pointers. FIXME: This shouldn't even be possible!
4473 continue;
4474 auto *StoredLoad = dyn_cast<LoadInst>(SI->getValueOperand());
4475 if (!StoredLoad || !StoredLoad->isSimple())
4476 continue;
4477 assert(!SI->isVolatile() && "Cannot split volatile stores!");
4478
4479 Stores.push_back(SI);
4480 } else {
4481 // Other uses cannot be pre-split.
4482 continue;
4483 }
4484
4485 // Record the initial split.
4486 LLVM_DEBUG(dbgs() << " Candidate: " << *I << "\n");
4487 auto &Offsets = SplitOffsetsMap[I];
4488 assert(Offsets.Splits.empty() &&
4489 "Should not have splits the first time we see an instruction!");
4490 Offsets.S = &S;
4491 Offsets.Splits.push_back(P.endOffset() - S.beginOffset());
4492 }
4493
4494 // Now scan the already split slices, and add a split for any of them which
4495 // we're going to pre-split.
4496 for (Slice *S : P.splitSliceTails()) {
4497 auto SplitOffsetsMapI =
4498 SplitOffsetsMap.find(cast<Instruction>(S->getUse()->getUser()));
4499 if (SplitOffsetsMapI == SplitOffsetsMap.end())
4500 continue;
4501 auto &Offsets = SplitOffsetsMapI->second;
4502
4503 assert(Offsets.S == S && "Found a mismatched slice!");
4504 assert(!Offsets.Splits.empty() &&
4505 "Cannot have an empty set of splits on the second partition!");
4506 assert(Offsets.Splits.back() ==
4507 P.beginOffset() - Offsets.S->beginOffset() &&
4508 "Previous split does not end where this one begins!");
4509
4510 // Record each split. The last partition's end isn't needed as the size
4511 // of the slice dictates that.
4512 if (S->endOffset() > P.endOffset())
4513 Offsets.Splits.push_back(P.endOffset() - Offsets.S->beginOffset());
4514 }
4515 }
4516
4517 // We may have split loads where some of their stores are split stores. For
4518 // such loads and stores, we can only pre-split them if their splits exactly
4519 // match relative to their starting offset. We have to verify this prior to
4520 // any rewriting.
4521 llvm::erase_if(Stores, [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
4522 // Lookup the load we are storing in our map of split
4523 // offsets.
4524 auto *LI = cast<LoadInst>(SI->getValueOperand());
4525 // If it was completely unsplittable, then we're done,
4526 // and this store can't be pre-split.
4527 if (UnsplittableLoads.count(LI))
4528 return true;
4529
4530 auto LoadOffsetsI = SplitOffsetsMap.find(LI);
4531 if (LoadOffsetsI == SplitOffsetsMap.end())
4532 return false; // Unrelated loads are definitely safe.
4533 auto &LoadOffsets = LoadOffsetsI->second;
4534
4535 // Now lookup the store's offsets.
4536 auto &StoreOffsets = SplitOffsetsMap[SI];
4537
4538 // If the relative offsets of each split in the load and
4539 // store match exactly, then we can split them and we
4540 // don't need to remove them here.
4541 if (LoadOffsets.Splits == StoreOffsets.Splits)
4542 return false;
4543
4544 LLVM_DEBUG(dbgs() << " Mismatched splits for load and store:\n"
4545 << " " << *LI << "\n"
4546 << " " << *SI << "\n");
4547
4548 // We've found a store and load that we need to split
4549 // with mismatched relative splits. Just give up on them
4550 // and remove both instructions from our list of
4551 // candidates.
4552 UnsplittableLoads.insert(LI);
4553 return true;
4554 });
4555 // Now we have to go *back* through all the stores, because a later store may
4556 // have caused an earlier store's load to become unsplittable and if it is
4557 // unsplittable for the later store, then we can't rely on it being split in
4558 // the earlier store either.
4559 llvm::erase_if(Stores, [&UnsplittableLoads](StoreInst *SI) {
4560 auto *LI = cast<LoadInst>(SI->getValueOperand());
4561 return UnsplittableLoads.count(LI);
4562 });
4563 // Once we've established all the loads that can't be split for some reason,
4564 // filter any that made it into our list out.
4565 llvm::erase_if(Loads, [&UnsplittableLoads](LoadInst *LI) {
4566 return UnsplittableLoads.count(LI);
4567 });
4568
4569 // If no loads or stores are left, there is no pre-splitting to be done for
4570 // this alloca.
4571 if (Loads.empty() && Stores.empty())
4572 return false;
4573
4574 // From here on, we can't fail and will be building new accesses, so rig up
4575 // an IR builder.
4576 IRBuilderTy IRB(&AI);
4577
4578 // Collect the new slices which we will merge into the alloca slices.
4579 SmallVector<Slice, 4> NewSlices;
4580
4581 // Track any allocas we end up splitting loads and stores for so we iterate
4582 // on them.
4583 SmallPtrSet<AllocaInst *, 4> ResplitPromotableAllocas;
4584
4585 // At this point, we have collected all of the loads and stores we can
4586 // pre-split, and the specific splits needed for them. We actually do the
4587 // splitting in a specific order in order to handle when one of the loads in
4588 // the value operand to one of the stores.
4589 //
4590 // First, we rewrite all of the split loads, and just accumulate each split
4591 // load in a parallel structure. We also build the slices for them and append
4592 // them to the alloca slices.
4594 std::vector<LoadInst *> SplitLoads;
4595 const DataLayout &DL = AI.getDataLayout();
4596 for (LoadInst *LI : Loads) {
4597 SplitLoads.clear();
4598
4599 auto &Offsets = SplitOffsetsMap[LI];
4600 unsigned SliceSize = Offsets.S->endOffset() - Offsets.S->beginOffset();
4601 assert(LI->getType()->getIntegerBitWidth() % 8 == 0 &&
4602 "Load must have type size equal to store size");
4603 assert(LI->getType()->getIntegerBitWidth() / 8 >= SliceSize &&
4604 "Load must be >= slice size");
4605
4606 uint64_t BaseOffset = Offsets.S->beginOffset();
4607 assert(BaseOffset + SliceSize > BaseOffset &&
4608 "Cannot represent alloca access size using 64-bit integers!");
4609
4610 Instruction *BasePtr = cast<Instruction>(LI->getPointerOperand());
4611 IRB.SetInsertPoint(LI);
4612
4613 LLVM_DEBUG(dbgs() << " Splitting load: " << *LI << "\n");
4614
4615 uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();
4616 int Idx = 0, Size = Offsets.Splits.size();
4617 for (;;) {
4618 auto *PartTy = Type::getIntNTy(LI->getContext(), PartSize * 8);
4619 auto AS = LI->getPointerAddressSpace();
4620 auto *PartPtrTy = LI->getPointerOperandType();
4621 LoadInst *PLoad = IRB.CreateAlignedLoad(
4622 PartTy,
4623 getAdjustedPtr(IRB, DL, BasePtr,
4624 APInt(DL.getIndexSizeInBits(AS), PartOffset),
4625 PartPtrTy, BasePtr->getName() + "."),
4626 getAdjustedAlignment(LI, PartOffset),
4627 /*IsVolatile*/ false, LI->getName());
4628 PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4629 LLVMContext::MD_access_group});
4630
4631 // Append this load onto the list of split loads so we can find it later
4632 // to rewrite the stores.
4633 SplitLoads.push_back(PLoad);
4634
4635 // Now build a new slice for the alloca.
4636 NewSlices.push_back(
4637 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4638 &PLoad->getOperandUse(PLoad->getPointerOperandIndex()),
4639 /*IsSplittable*/ false));
4640 LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()
4641 << ", " << NewSlices.back().endOffset()
4642 << "): " << *PLoad << "\n");
4643
4644 // See if we've handled all the splits.
4645 if (Idx >= Size)
4646 break;
4647
4648 // Setup the next partition.
4649 PartOffset = Offsets.Splits[Idx];
4650 ++Idx;
4651 PartSize = (Idx < Size ? Offsets.Splits[Idx] : SliceSize) - PartOffset;
4652 }
4653
4654 // Now that we have the split loads, do the slow walk over all uses of the
4655 // load and rewrite them as split stores, or save the split loads to use
4656 // below if the store is going to be split there anyways.
4657 bool DeferredStores = false;
4658 for (User *LU : LI->users()) {
4659 StoreInst *SI = cast<StoreInst>(LU);
4660 if (!Stores.empty() && SplitOffsetsMap.count(SI)) {
4661 DeferredStores = true;
4662 LLVM_DEBUG(dbgs() << " Deferred splitting of store: " << *SI
4663 << "\n");
4664 continue;
4665 }
4666
4667 Value *StoreBasePtr = SI->getPointerOperand();
4668 IRB.SetInsertPoint(SI);
4669 AAMDNodes AATags = SI->getAAMetadata();
4670
4671 LLVM_DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n");
4672
4673 for (int Idx = 0, Size = SplitLoads.size(); Idx < Size; ++Idx) {
4674 LoadInst *PLoad = SplitLoads[Idx];
4675 uint64_t PartOffset = Idx == 0 ? 0 : Offsets.Splits[Idx - 1];
4676 auto *PartPtrTy = SI->getPointerOperandType();
4677
4678 auto AS = SI->getPointerAddressSpace();
4679 StoreInst *PStore = IRB.CreateAlignedStore(
4680 PLoad,
4681 getAdjustedPtr(IRB, DL, StoreBasePtr,
4682 APInt(DL.getIndexSizeInBits(AS), PartOffset),
4683 PartPtrTy, StoreBasePtr->getName() + "."),
4684 getAdjustedAlignment(SI, PartOffset),
4685 /*IsVolatile*/ false);
4686 PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
4687 LLVMContext::MD_access_group,
4688 LLVMContext::MD_DIAssignID});
4689
4690 if (AATags)
4691 PStore->setAAMetadata(
4692 AATags.adjustForAccess(PartOffset, PLoad->getType(), DL));
4693 LLVM_DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n");
4694 }
4695
4696 // We want to immediately iterate on any allocas impacted by splitting
4697 // this store, and we have to track any promotable alloca (indicated by
4698 // a direct store) as needing to be resplit because it is no longer
4699 // promotable.
4700 if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(StoreBasePtr)) {
4701 ResplitPromotableAllocas.insert(OtherAI);
4702 Worklist.insert(OtherAI);
4703 } else if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(
4704 StoreBasePtr->stripInBoundsOffsets())) {
4705 Worklist.insert(OtherAI);
4706 }
4707
4708 // Mark the original store as dead.
4709 DeadInsts.push_back(SI);
4710 }
4711
4712 // Save the split loads if there are deferred stores among the users.
4713 if (DeferredStores)
4714 SplitLoadsMap.insert(std::make_pair(LI, std::move(SplitLoads)));
4715
4716 // Mark the original load as dead and kill the original slice.
4717 DeadInsts.push_back(LI);
4718 Offsets.S->kill();
4719 }
4720
4721 // Second, we rewrite all of the split stores. At this point, we know that
4722 // all loads from this alloca have been split already. For stores of such
4723 // loads, we can simply look up the pre-existing split loads. For stores of
4724 // other loads, we split those loads first and then write split stores of
4725 // them.
4726 for (StoreInst *SI : Stores) {
4727 auto *LI = cast<LoadInst>(SI->getValueOperand());
4728 IntegerType *Ty = cast<IntegerType>(LI->getType());
4729 assert(Ty->getBitWidth() % 8 == 0);
4730 uint64_t StoreSize = Ty->getBitWidth() / 8;
4731 assert(StoreSize > 0 && "Cannot have a zero-sized integer store!");
4732
4733 auto &Offsets = SplitOffsetsMap[SI];
4734 assert(StoreSize == Offsets.S->endOffset() - Offsets.S->beginOffset() &&
4735 "Slice size should always match load size exactly!");
4736 uint64_t BaseOffset = Offsets.S->beginOffset();
4737 assert(BaseOffset + StoreSize > BaseOffset &&
4738 "Cannot represent alloca access size using 64-bit integers!");
4739
4740 Value *LoadBasePtr = LI->getPointerOperand();
4741 Instruction *StoreBasePtr = cast<Instruction>(SI->getPointerOperand());
4742
4743 LLVM_DEBUG(dbgs() << " Splitting store: " << *SI << "\n");
4744
4745 // Check whether we have an already split load.
4746 auto SplitLoadsMapI = SplitLoadsMap.find(LI);
4747 std::vector<LoadInst *> *SplitLoads = nullptr;
4748 if (SplitLoadsMapI != SplitLoadsMap.end()) {
4749 SplitLoads = &SplitLoadsMapI->second;
4750 assert(SplitLoads->size() == Offsets.Splits.size() + 1 &&
4751 "Too few split loads for the number of splits in the store!");
4752 } else {
4753 LLVM_DEBUG(dbgs() << " of load: " << *LI << "\n");
4754 }
4755
4756 uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();
4757 int Idx = 0, Size = Offsets.Splits.size();
4758 for (;;) {
4759 auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8);
4760 auto *LoadPartPtrTy = LI->getPointerOperandType();
4761 auto *StorePartPtrTy = SI->getPointerOperandType();
4762
4763 // Either lookup a split load or create one.
4764 LoadInst *PLoad;
4765 if (SplitLoads) {
4766 PLoad = (*SplitLoads)[Idx];
4767 } else {
4768 IRB.SetInsertPoint(LI);
4769 auto AS = LI->getPointerAddressSpace();
4770 PLoad = IRB.CreateAlignedLoad(
4771 PartTy,
4772 getAdjustedPtr(IRB, DL, LoadBasePtr,
4773 APInt(DL.getIndexSizeInBits(AS), PartOffset),
4774 LoadPartPtrTy, LoadBasePtr->getName() + "."),
4775 getAdjustedAlignment(LI, PartOffset),
4776 /*IsVolatile*/ false, LI->getName());
4777 PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4778 LLVMContext::MD_access_group});
4779 }
4780
4781 // And store this partition.
4782 IRB.SetInsertPoint(SI);
4783 auto AS = SI->getPointerAddressSpace();
4784 StoreInst *PStore = IRB.CreateAlignedStore(
4785 PLoad,
4786 getAdjustedPtr(IRB, DL, StoreBasePtr,
4787 APInt(DL.getIndexSizeInBits(AS), PartOffset),
4788 StorePartPtrTy, StoreBasePtr->getName() + "."),
4789 getAdjustedAlignment(SI, PartOffset),
4790 /*IsVolatile*/ false);
4791 PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
4792 LLVMContext::MD_access_group});
4793
4794 // Now build a new slice for the alloca.
4795 NewSlices.push_back(
4796 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4797 &PStore->getOperandUse(PStore->getPointerOperandIndex()),
4798 /*IsSplittable*/ false));
4799 LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()
4800 << ", " << NewSlices.back().endOffset()
4801 << "): " << *PStore << "\n");
4802 if (!SplitLoads) {
4803 LLVM_DEBUG(dbgs() << " of split load: " << *PLoad << "\n");
4804 }
4805
4806 // See if we've finished all the splits.
4807 if (Idx >= Size)
4808 break;
4809
4810 // Setup the next partition.
4811 PartOffset = Offsets.Splits[Idx];
4812 ++Idx;
4813 PartSize = (Idx < Size ? Offsets.Splits[Idx] : StoreSize) - PartOffset;
4814 }
4815
4816 // We want to immediately iterate on any allocas impacted by splitting
4817 // this load, which is only relevant if it isn't a load of this alloca and
4818 // thus we didn't already split the loads above. We also have to keep track
4819 // of any promotable allocas we split loads on as they can no longer be
4820 // promoted.
4821 if (!SplitLoads) {
4822 if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(LoadBasePtr)) {
4823 assert(OtherAI != &AI && "We can't re-split our own alloca!");
4824 ResplitPromotableAllocas.insert(OtherAI);
4825 Worklist.insert(OtherAI);
4826 } else if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(
4827 LoadBasePtr->stripInBoundsOffsets())) {
4828 assert(OtherAI != &AI && "We can't re-split our own alloca!");
4829 Worklist.insert(OtherAI);
4830 }
4831 }
4832
4833 // Mark the original store as dead now that we've split it up and kill its
4834 // slice. Note that we leave the original load in place unless this store
4835 // was its only use. It may in turn be split up if it is an alloca load
4836 // for some other alloca, but it may be a normal load. This may introduce
4837 // redundant loads, but where those can be merged the rest of the optimizer
4838 // should handle the merging, and this uncovers SSA splits which is more
4839 // important. In practice, the original loads will almost always be fully
4840 // split and removed eventually, and the splits will be merged by any
4841 // trivial CSE, including instcombine.
4842 if (LI->hasOneUse()) {
4843 assert(*LI->user_begin() == SI && "Single use isn't this store!");
4844 DeadInsts.push_back(LI);
4845 }
4846 DeadInsts.push_back(SI);
4847 Offsets.S->kill();
4848 }
4849
4850 // Remove the killed slices that have ben pre-split.
4851 llvm::erase_if(AS, [](const Slice &S) { return S.isDead(); });
4852
4853 // Insert our new slices. This will sort and merge them into the sorted
4854 // sequence.
4855 AS.insert(NewSlices);
4856
4857 LLVM_DEBUG(dbgs() << " Pre-split slices:\n");
4858#ifndef NDEBUG
4859 for (auto I = AS.begin(), E = AS.end(); I != E; ++I)
4860 LLVM_DEBUG(AS.print(dbgs(), I, " "));
4861#endif
4862
4863 // Finally, don't try to promote any allocas that new require re-splitting.
4864 // They have already been added to the worklist above.
4865 PromotableAllocas.set_subtract(ResplitPromotableAllocas);
4866
4867 return true;
4868}
4869
4870/// Rewrite an alloca partition's users.
4871///
4872/// This routine drives both of the rewriting goals of the SROA pass. It tries
4873/// to rewrite uses of an alloca partition to be conducive for SSA value
4874/// promotion. If the partition needs a new, more refined alloca, this will
4875/// build that new alloca, preserving as much type information as possible, and
4876/// rewrite the uses of the old alloca to point at the new one and have the
4877/// appropriate new offsets. It also evaluates how successful the rewrite was
4878/// at enabling promotion and if it was successful queues the alloca to be
4879/// promoted.
4880AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
4881 Partition &P) {
4882 // Try to compute a friendly type for this partition of the alloca. This
4883 // won't always succeed, in which case we fall back to a legal integer type
4884 // or an i8 array of an appropriate size.
4885 Type *SliceTy = nullptr;
4886 VectorType *SliceVecTy = nullptr;
4887 const DataLayout &DL = AI.getDataLayout();
4888 unsigned VScale = AI.getFunction()->getVScaleValue();
4889
4890 std::pair<Type *, IntegerType *> CommonUseTy =
4891 findCommonType(P.begin(), P.end(), P.endOffset());
4892 // Do all uses operate on the same type?
4893 if (CommonUseTy.first) {
4894 TypeSize CommonUseSize = DL.getTypeAllocSize(CommonUseTy.first);
4895 if (CommonUseSize.isFixed() && CommonUseSize.getFixedValue() >= P.size()) {
4896 SliceTy = CommonUseTy.first;
4897 SliceVecTy = dyn_cast<VectorType>(SliceTy);
4898 }
4899 }
4900 // If not, can we find an appropriate subtype in the original allocated type?
4901 if (!SliceTy)
4902 if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(),
4903 P.beginOffset(), P.size()))
4904 SliceTy = TypePartitionTy;
4905
4906 // If still not, can we use the largest bitwidth integer type used?
4907 if (!SliceTy && CommonUseTy.second)
4908 if (DL.getTypeAllocSize(CommonUseTy.second).getFixedValue() >= P.size()) {
4909 SliceTy = CommonUseTy.second;
4910 SliceVecTy = dyn_cast<VectorType>(SliceTy);
4911 }
4912 if ((!SliceTy || (SliceTy->isArrayTy() &&
4913 SliceTy->getArrayElementType()->isIntegerTy())) &&
4914 DL.isLegalInteger(P.size() * 8)) {
4915 SliceTy = Type::getIntNTy(*C, P.size() * 8);
4916 }
4917
4918 // If the common use types are not viable for promotion then attempt to find
4919 // another type that is viable.
4920 if (SliceVecTy && !checkVectorTypeForPromotion(P, SliceVecTy, DL, VScale))
4921 if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(),
4922 P.beginOffset(), P.size())) {
4923 VectorType *TypePartitionVecTy = dyn_cast<VectorType>(TypePartitionTy);
4924 if (TypePartitionVecTy &&
4925 checkVectorTypeForPromotion(P, TypePartitionVecTy, DL, VScale))
4926 SliceTy = TypePartitionTy;
4927 }
4928
4929 if (!SliceTy)
4930 SliceTy = ArrayType::get(Type::getInt8Ty(*C), P.size());
4931 assert(DL.getTypeAllocSize(SliceTy).getFixedValue() >= P.size());
4932
4933 bool IsIntegerPromotable = isIntegerWideningViable(P, SliceTy, DL);
4934
4935 VectorType *VecTy =
4936 IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, DL, VScale);
4937 if (VecTy)
4938 SliceTy = VecTy;
4939
4940 // Check for the case where we're going to rewrite to a new alloca of the
4941 // exact same type as the original, and with the same access offsets. In that
4942 // case, re-use the existing alloca, but still run through the rewriter to
4943 // perform phi and select speculation.
4944 // P.beginOffset() can be non-zero even with the same type in a case with
4945 // out-of-bounds access (e.g. @PR35657 function in SROA/basictest.ll).
4946 AllocaInst *NewAI;
4947 if (SliceTy == AI.getAllocatedType() && P.beginOffset() == 0) {
4948 NewAI = &AI;
4949 // FIXME: We should be able to bail at this point with "nothing changed".
4950 // FIXME: We might want to defer PHI speculation until after here.
4951 // FIXME: return nullptr;
4952 } else {
4953 // Make sure the alignment is compatible with P.beginOffset().
4954 const Align Alignment = commonAlignment(AI.getAlign(), P.beginOffset());
4955 // If we will get at least this much alignment from the type alone, leave
4956 // the alloca's alignment unconstrained.
4957 const bool IsUnconstrained = Alignment <= DL.getABITypeAlign(SliceTy);
4958 NewAI = new AllocaInst(
4959 SliceTy, AI.getAddressSpace(), nullptr,
4960 IsUnconstrained ? DL.getPrefTypeAlign(SliceTy) : Alignment,
4961 AI.getName() + ".sroa." + Twine(P.begin() - AS.begin()),
4962 AI.getIterator());
4963 // Copy the old AI debug location over to the new one.
4964 NewAI->setDebugLoc(AI.getDebugLoc());
4965 ++NumNewAllocas;
4966 }
4967
4968 LLVM_DEBUG(dbgs() << "Rewriting alloca partition " << "[" << P.beginOffset()
4969 << "," << P.endOffset() << ") to: " << *NewAI << "\n");
4970
4971 // Track the high watermark on the worklist as it is only relevant for
4972 // promoted allocas. We will reset it to this point if the alloca is not in
4973 // fact scheduled for promotion.
4974 unsigned PPWOldSize = PostPromotionWorklist.size();
4975 unsigned NumUses = 0;
4978
4979 AllocaSliceRewriter Rewriter(DL, AS, *this, AI, *NewAI, P.beginOffset(),
4980 P.endOffset(), IsIntegerPromotable, VecTy,
4981 PHIUsers, SelectUsers);
4982 bool Promotable = true;
4983 for (Slice *S : P.splitSliceTails()) {
4984 Promotable &= Rewriter.visit(S);
4985 ++NumUses;
4986 }
4987 for (Slice &S : P) {
4988 Promotable &= Rewriter.visit(&S);
4989 ++NumUses;
4990 }
4991
4992 NumAllocaPartitionUses += NumUses;
4993 MaxUsesPerAllocaPartition.updateMax(NumUses);
4994
4995 // Now that we've processed all the slices in the new partition, check if any
4996 // PHIs or Selects would block promotion.
4997 for (PHINode *PHI : PHIUsers)
4998 if (!isSafePHIToSpeculate(*PHI)) {
4999 Promotable = false;
5000 PHIUsers.clear();
5001 SelectUsers.clear();
5002 break;
5003 }
5004
5006 NewSelectsToRewrite;
5007 NewSelectsToRewrite.reserve(SelectUsers.size());
5008 for (SelectInst *Sel : SelectUsers) {
5009 std::optional<RewriteableMemOps> Ops =
5010 isSafeSelectToSpeculate(*Sel, PreserveCFG);
5011 if (!Ops) {
5012 Promotable = false;
5013 PHIUsers.clear();
5014 SelectUsers.clear();
5015 NewSelectsToRewrite.clear();
5016 break;
5017 }
5018 NewSelectsToRewrite.emplace_back(std::make_pair(Sel, *Ops));
5019 }
5020
5021 if (Promotable) {
5022 for (Use *U : AS.getDeadUsesIfPromotable()) {
5023 auto *OldInst = dyn_cast<Instruction>(U->get());
5025 if (OldInst)
5026 if (isInstructionTriviallyDead(OldInst))
5027 DeadInsts.push_back(OldInst);
5028 }
5029 if (PHIUsers.empty() && SelectUsers.empty()) {
5030 // Promote the alloca.
5031 PromotableAllocas.insert(NewAI);
5032 } else {
5033 // If we have either PHIs or Selects to speculate, add them to those
5034 // worklists and re-queue the new alloca so that we promote in on the
5035 // next iteration.
5036 SpeculatablePHIs.insert_range(PHIUsers);
5037 SelectsToRewrite.reserve(SelectsToRewrite.size() +
5038 NewSelectsToRewrite.size());
5039 for (auto &&KV : llvm::make_range(
5040 std::make_move_iterator(NewSelectsToRewrite.begin()),
5041 std::make_move_iterator(NewSelectsToRewrite.end())))
5042 SelectsToRewrite.insert(std::move(KV));
5043 Worklist.insert(NewAI);
5044 }
5045 } else {
5046 // Drop any post-promotion work items if promotion didn't happen.
5047 while (PostPromotionWorklist.size() > PPWOldSize)
5048 PostPromotionWorklist.pop_back();
5049
5050 // We couldn't promote and we didn't create a new partition, nothing
5051 // happened.
5052 if (NewAI == &AI)
5053 return nullptr;
5054
5055 // If we can't promote the alloca, iterate on it to check for new
5056 // refinements exposed by splitting the current alloca. Don't iterate on an
5057 // alloca which didn't actually change and didn't get promoted.
5058 Worklist.insert(NewAI);
5059 }
5060
5061 return NewAI;
5062}
5063
5064// There isn't a shared interface to get the "address" parts out of a
5065// dbg.declare and dbg.assign, so provide some wrappers.
5067 if (DVR->getType() == DbgVariableRecord::LocationType::Assign)
5068 return DVR->isKillAddress();
5069 return DVR->isKillLocation();
5070}
5071
5073 if (DVR->getType() == DbgVariableRecord::LocationType::Assign)
5074 return DVR->getAddressExpression();
5075 return DVR->getExpression();
5076}
5077
5078/// Create or replace an existing fragment in a DIExpression with \p Frag.
5079/// If the expression already contains a DW_OP_LLVM_extract_bits_[sz]ext
5080/// operation, add \p BitExtractOffset to the offset part.
5081///
5082/// Returns the new expression, or nullptr if this fails (see details below).
5083///
5084/// This function is similar to DIExpression::createFragmentExpression except
5085/// for 3 important distinctions:
5086/// 1. The new fragment isn't relative to an existing fragment.
5087/// 2. It assumes the computed location is a memory location. This means we
5088/// don't need to perform checks that creating the fragment preserves the
5089/// expression semantics.
5090/// 3. Existing extract_bits are modified independently of fragment changes
5091/// using \p BitExtractOffset. A change to the fragment offset or size
5092/// may affect a bit extract. But a bit extract offset can change
5093/// independently of the fragment dimensions.
5094///
5095/// Returns the new expression, or nullptr if one couldn't be created.
5096/// Ideally this is only used to signal that a bit-extract has become
5097/// zero-sized (and thus the new debug record has no size and can be
5098/// dropped), however, it fails for other reasons too - see the FIXME below.
5099///
5100/// FIXME: To keep the change that introduces this function NFC it bails
5101/// in some situations unecessarily, e.g. when fragment and bit extract
5102/// sizes differ.
5105 int64_t BitExtractOffset) {
5107 bool HasFragment = false;
5108 bool HasBitExtract = false;
5109
5110 for (auto &Op : Expr->expr_ops()) {
5111 if (Op.getOp() == dwarf::DW_OP_LLVM_fragment) {
5112 HasFragment = true;
5113 continue;
5114 }
5115 if (Op.getOp() == dwarf::DW_OP_LLVM_extract_bits_zext ||
5117 HasBitExtract = true;
5118 int64_t ExtractOffsetInBits = Op.getArg(0);
5119 int64_t ExtractSizeInBits = Op.getArg(1);
5120
5121 // DIExpression::createFragmentExpression doesn't know how to handle
5122 // a fragment that is smaller than the extract. Copy the behaviour
5123 // (bail) to avoid non-NFC changes.
5124 // FIXME: Don't do this.
5125 if (Frag.SizeInBits < uint64_t(ExtractSizeInBits))
5126 return nullptr;
5127
5128 assert(BitExtractOffset <= 0);
5129 int64_t AdjustedOffset = ExtractOffsetInBits + BitExtractOffset;
5130
5131 // DIExpression::createFragmentExpression doesn't know what to do
5132 // if the new extract starts "outside" the existing one. Copy the
5133 // behaviour (bail) to avoid non-NFC changes.
5134 // FIXME: Don't do this.
5135 if (AdjustedOffset < 0)
5136 return nullptr;
5137
5138 Ops.push_back(Op.getOp());
5139 Ops.push_back(std::max<int64_t>(0, AdjustedOffset));
5140 Ops.push_back(ExtractSizeInBits);
5141 continue;
5142 }
5143 Op.appendToVector(Ops);
5144 }
5145
5146 // Unsupported by createFragmentExpression, so don't support it here yet to
5147 // preserve NFC-ness.
5148 if (HasFragment && HasBitExtract)
5149 return nullptr;
5150
5151 if (!HasBitExtract) {
5153 Ops.push_back(Frag.OffsetInBits);
5154 Ops.push_back(Frag.SizeInBits);
5155 }
5156 return DIExpression::get(Expr->getContext(), Ops);
5157}
5158
5159/// Insert a new DbgRecord.
5160/// \p Orig Original to copy record type, debug loc and variable from, and
5161/// additionally value and value expression for dbg_assign records.
5162/// \p NewAddr Location's new base address.
5163/// \p NewAddrExpr New expression to apply to address.
5164/// \p BeforeInst Insert position.
5165/// \p NewFragment New fragment (absolute, non-relative).
5166/// \p BitExtractAdjustment Offset to apply to any extract_bits op.
5167static void
5169 DIExpression *NewAddrExpr, Instruction *BeforeInst,
5170 std::optional<DIExpression::FragmentInfo> NewFragment,
5171 int64_t BitExtractAdjustment) {
5172 (void)DIB;
5173
5174 // A dbg_assign puts fragment info in the value expression only. The address
5175 // expression has already been built: NewAddrExpr. A dbg_declare puts the
5176 // new fragment info into NewAddrExpr (as it only has one expression).
5177 DIExpression *NewFragmentExpr =
5178 Orig->isDbgAssign() ? Orig->getExpression() : NewAddrExpr;
5179 if (NewFragment)
5180 NewFragmentExpr = createOrReplaceFragment(NewFragmentExpr, *NewFragment,
5181 BitExtractAdjustment);
5182 if (!NewFragmentExpr)
5183 return;
5184
5185 if (Orig->isDbgDeclare()) {
5187 NewAddr, Orig->getVariable(), NewFragmentExpr, Orig->getDebugLoc());
5188 BeforeInst->getParent()->insertDbgRecordBefore(DVR,
5189 BeforeInst->getIterator());
5190 return;
5191 }
5192
5193 if (Orig->isDbgValue()) {
5195 NewAddr, Orig->getVariable(), NewFragmentExpr, Orig->getDebugLoc());
5196 // Drop debug information if the expression doesn't start with a
5197 // DW_OP_deref. This is because without a DW_OP_deref, the #dbg_value
5198 // describes the address of alloca rather than the value inside the alloca.
5199 if (!NewFragmentExpr->startsWithDeref())
5200 DVR->setKillAddress();
5201 BeforeInst->getParent()->insertDbgRecordBefore(DVR,
5202 BeforeInst->getIterator());
5203 return;
5204 }
5205
5206 // Apply a DIAssignID to the store if it doesn't already have it.
5207 if (!NewAddr->hasMetadata(LLVMContext::MD_DIAssignID)) {
5208 NewAddr->setMetadata(LLVMContext::MD_DIAssignID,
5210 }
5211
5213 NewAddr, Orig->getValue(), Orig->getVariable(), NewFragmentExpr, NewAddr,
5214 NewAddrExpr, Orig->getDebugLoc());
5215 LLVM_DEBUG(dbgs() << "Created new DVRAssign: " << *NewAssign << "\n");
5216 (void)NewAssign;
5217}
5218
5219/// Walks the slices of an alloca and form partitions based on them,
5220/// rewriting each of their uses.
5221bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
5222 if (AS.begin() == AS.end())
5223 return false;
5224
5225 unsigned NumPartitions = 0;
5226 bool Changed = false;
5227 const DataLayout &DL = AI.getModule()->getDataLayout();
5228
5229 // First try to pre-split loads and stores.
5230 Changed |= presplitLoadsAndStores(AI, AS);
5231
5232 // Now that we have identified any pre-splitting opportunities,
5233 // mark loads and stores unsplittable except for the following case.
5234 // We leave a slice splittable if all other slices are disjoint or fully
5235 // included in the slice, such as whole-alloca loads and stores.
5236 // If we fail to split these during pre-splitting, we want to force them
5237 // to be rewritten into a partition.
5238 bool IsSorted = true;
5239
5240 uint64_t AllocaSize =
5241 DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue();
5242 const uint64_t MaxBitVectorSize = 1024;
5243 if (AllocaSize <= MaxBitVectorSize) {
5244 // If a byte boundary is included in any load or store, a slice starting or
5245 // ending at the boundary is not splittable.
5246 SmallBitVector SplittableOffset(AllocaSize + 1, true);
5247 for (Slice &S : AS)
5248 for (unsigned O = S.beginOffset() + 1;
5249 O < S.endOffset() && O < AllocaSize; O++)
5250 SplittableOffset.reset(O);
5251
5252 for (Slice &S : AS) {
5253 if (!S.isSplittable())
5254 continue;
5255
5256 if ((S.beginOffset() > AllocaSize || SplittableOffset[S.beginOffset()]) &&
5257 (S.endOffset() > AllocaSize || SplittableOffset[S.endOffset()]))
5258 continue;
5259
5260 if (isa<LoadInst>(S.getUse()->getUser()) ||
5261 isa<StoreInst>(S.getUse()->getUser())) {
5262 S.makeUnsplittable();
5263 IsSorted = false;
5264 }
5265 }
5266 } else {
5267 // We only allow whole-alloca splittable loads and stores
5268 // for a large alloca to avoid creating too large BitVector.
5269 for (Slice &S : AS) {
5270 if (!S.isSplittable())
5271 continue;
5272
5273 if (S.beginOffset() == 0 && S.endOffset() >= AllocaSize)
5274 continue;
5275
5276 if (isa<LoadInst>(S.getUse()->getUser()) ||
5277 isa<StoreInst>(S.getUse()->getUser())) {
5278 S.makeUnsplittable();
5279 IsSorted = false;
5280 }
5281 }
5282 }
5283
5284 if (!IsSorted)
5286
5287 /// Describes the allocas introduced by rewritePartition in order to migrate
5288 /// the debug info.
5289 struct Fragment {
5290 AllocaInst *Alloca;
5292 uint64_t Size;
5293 Fragment(AllocaInst *AI, uint64_t O, uint64_t S)
5294 : Alloca(AI), Offset(O), Size(S) {}
5295 };
5296 SmallVector<Fragment, 4> Fragments;
5297
5298 // Rewrite each partition.
5299 for (auto &P : AS.partitions()) {
5300 if (AllocaInst *NewAI = rewritePartition(AI, AS, P)) {
5301 Changed = true;
5302 if (NewAI != &AI) {
5303 uint64_t SizeOfByte = 8;
5304 uint64_t AllocaSize =
5305 DL.getTypeSizeInBits(NewAI->getAllocatedType()).getFixedValue();
5306 // Don't include any padding.
5307 uint64_t Size = std::min(AllocaSize, P.size() * SizeOfByte);
5308 Fragments.push_back(
5309 Fragment(NewAI, P.beginOffset() * SizeOfByte, Size));
5310 }
5311 }
5312 ++NumPartitions;
5313 }
5314
5315 NumAllocaPartitions += NumPartitions;
5316 MaxPartitionsPerAlloca.updateMax(NumPartitions);
5317
5318 // Migrate debug information from the old alloca to the new alloca(s)
5319 // and the individual partitions.
5320 auto MigrateOne = [&](DbgVariableRecord *DbgVariable) {
5321 // Can't overlap with undef memory.
5323 return;
5324
5325 const Value *DbgPtr = DbgVariable->getAddress();
5327 DbgVariable->getFragmentOrEntireVariable();
5328 // Get the address expression constant offset if one exists and the ops
5329 // that come after it.
5330 int64_t CurrentExprOffsetInBytes = 0;
5331 SmallVector<uint64_t> PostOffsetOps;
5333 ->extractLeadingOffset(CurrentExprOffsetInBytes, PostOffsetOps))
5334 return; // Couldn't interpret this DIExpression - drop the var.
5335
5336 // Offset defined by a DW_OP_LLVM_extract_bits_[sz]ext.
5337 int64_t ExtractOffsetInBits = 0;
5338 for (auto Op : getAddressExpression(DbgVariable)->expr_ops()) {
5339 if (Op.getOp() == dwarf::DW_OP_LLVM_extract_bits_zext ||
5341 ExtractOffsetInBits = Op.getArg(0);
5342 break;
5343 }
5344 }
5345
5346 DIBuilder DIB(*AI.getModule(), /*AllowUnresolved*/ false);
5347 for (auto Fragment : Fragments) {
5348 int64_t OffsetFromLocationInBits;
5349 std::optional<DIExpression::FragmentInfo> NewDbgFragment;
5350 // Find the variable fragment that the new alloca slice covers.
5351 // Drop debug info for this variable fragment if we can't compute an
5352 // intersect between it and the alloca slice.
5354 DL, &AI, Fragment.Offset, Fragment.Size, DbgPtr,
5355 CurrentExprOffsetInBytes * 8, ExtractOffsetInBits, VarFrag,
5356 NewDbgFragment, OffsetFromLocationInBits))
5357 continue; // Do not migrate this fragment to this slice.
5358
5359 // Zero sized fragment indicates there's no intersect between the variable
5360 // fragment and the alloca slice. Skip this slice for this variable
5361 // fragment.
5362 if (NewDbgFragment && !NewDbgFragment->SizeInBits)
5363 continue; // Do not migrate this fragment to this slice.
5364
5365 // No fragment indicates DbgVariable's variable or fragment exactly
5366 // overlaps the slice; copy its fragment (or nullopt if there isn't one).
5367 if (!NewDbgFragment)
5368 NewDbgFragment = DbgVariable->getFragment();
5369
5370 // Reduce the new expression offset by the bit-extract offset since
5371 // we'll be keeping that.
5372 int64_t OffestFromNewAllocaInBits =
5373 OffsetFromLocationInBits - ExtractOffsetInBits;
5374 // We need to adjust an existing bit extract if the offset expression
5375 // can't eat the slack (i.e., if the new offset would be negative).
5376 int64_t BitExtractOffset =
5377 std::min<int64_t>(0, OffestFromNewAllocaInBits);
5378 // The magnitude of a negative value indicates the number of bits into
5379 // the existing variable fragment that the memory region begins. The new
5380 // variable fragment already excludes those bits - the new DbgPtr offset
5381 // only needs to be applied if it's positive.
5382 OffestFromNewAllocaInBits =
5383 std::max(int64_t(0), OffestFromNewAllocaInBits);
5384
5385 // Rebuild the expression:
5386 // {Offset(OffestFromNewAllocaInBits), PostOffsetOps, NewDbgFragment}
5387 // Add NewDbgFragment later, because dbg.assigns don't want it in the
5388 // address expression but the value expression instead.
5389 DIExpression *NewExpr = DIExpression::get(AI.getContext(), PostOffsetOps);
5390 if (OffestFromNewAllocaInBits > 0) {
5391 int64_t OffsetInBytes = (OffestFromNewAllocaInBits + 7) / 8;
5392 NewExpr = DIExpression::prepend(NewExpr, /*flags=*/0, OffsetInBytes);
5393 }
5394
5395 // Remove any existing intrinsics on the new alloca describing
5396 // the variable fragment.
5397 auto RemoveOne = [DbgVariable](auto *OldDII) {
5398 auto SameVariableFragment = [](const auto *LHS, const auto *RHS) {
5399 return LHS->getVariable() == RHS->getVariable() &&
5400 LHS->getDebugLoc()->getInlinedAt() ==
5401 RHS->getDebugLoc()->getInlinedAt();
5402 };
5403 if (SameVariableFragment(OldDII, DbgVariable))
5404 OldDII->eraseFromParent();
5405 };
5406 for_each(findDVRDeclares(Fragment.Alloca), RemoveOne);
5407 for_each(findDVRValues(Fragment.Alloca), RemoveOne);
5408 insertNewDbgInst(DIB, DbgVariable, Fragment.Alloca, NewExpr, &AI,
5409 NewDbgFragment, BitExtractOffset);
5410 }
5411 };
5412
5413 // Migrate debug information from the old alloca to the new alloca(s)
5414 // and the individual partitions.
5415 for_each(findDVRDeclares(&AI), MigrateOne);
5416 for_each(findDVRValues(&AI), MigrateOne);
5417 for_each(at::getDVRAssignmentMarkers(&AI), MigrateOne);
5418
5419 return Changed;
5420}
5421
5422/// Clobber a use with poison, deleting the used value if it becomes dead.
5423void SROA::clobberUse(Use &U) {
5424 Value *OldV = U;
5425 // Replace the use with an poison value.
5426 U = PoisonValue::get(OldV->getType());
5427
5428 // Check for this making an instruction dead. We have to garbage collect
5429 // all the dead instructions to ensure the uses of any alloca end up being
5430 // minimal.
5431 if (Instruction *OldI = dyn_cast<Instruction>(OldV))
5432 if (isInstructionTriviallyDead(OldI)) {
5433 DeadInsts.push_back(OldI);
5434 }
5435}
5436
5437/// A basic LoadAndStorePromoter that does not remove store nodes.
5439public:
5441 Type *ZeroType)
5442 : LoadAndStorePromoter(Insts, S), ZeroType(ZeroType) {}
5443 bool shouldDelete(Instruction *I) const override {
5444 return !isa<StoreInst>(I) && !isa<AllocaInst>(I);
5445 }
5446
5448 return UndefValue::get(ZeroType);
5449 }
5450
5451private:
5452 Type *ZeroType;
5453};
5454
5455bool SROA::propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS) {
5456 // Look through each "partition", looking for slices with the same start/end
5457 // that do not overlap with any before them. The slices are sorted by
5458 // increasing beginOffset. We don't use AS.partitions(), as it will use a more
5459 // sophisticated algorithm that takes splittable slices into account.
5460 LLVM_DEBUG(dbgs() << "Attempting to propagate values on " << AI << "\n");
5461 bool AllSameAndValid = true;
5462 Type *PartitionType = nullptr;
5464 uint64_t BeginOffset = 0;
5465 uint64_t EndOffset = 0;
5466
5467 auto Flush = [&]() {
5468 if (AllSameAndValid && !Insts.empty()) {
5469 LLVM_DEBUG(dbgs() << "Propagate values on slice [" << BeginOffset << ", "
5470 << EndOffset << ")\n");
5472 SSAUpdater SSA(&NewPHIs);
5473 Insts.push_back(&AI);
5474 BasicLoadAndStorePromoter Promoter(Insts, SSA, PartitionType);
5475 Promoter.run(Insts);
5476 }
5477 AllSameAndValid = true;
5478 PartitionType = nullptr;
5479 Insts.clear();
5480 };
5481
5482 for (Slice &S : AS) {
5483 auto *User = cast<Instruction>(S.getUse()->getUser());
5485 LLVM_DEBUG({
5486 dbgs() << "Ignoring slice: ";
5487 AS.print(dbgs(), &S);
5488 });
5489 continue;
5490 }
5491 if (S.beginOffset() >= EndOffset) {
5492 Flush();
5493 BeginOffset = S.beginOffset();
5494 EndOffset = S.endOffset();
5495 } else if (S.beginOffset() != BeginOffset || S.endOffset() != EndOffset) {
5496 if (AllSameAndValid) {
5497 LLVM_DEBUG({
5498 dbgs() << "Slice does not match range [" << BeginOffset << ", "
5499 << EndOffset << ")";
5500 AS.print(dbgs(), &S);
5501 });
5502 AllSameAndValid = false;
5503 }
5504 EndOffset = std::max(EndOffset, S.endOffset());
5505 continue;
5506 }
5507
5508 if (auto *LI = dyn_cast<LoadInst>(User)) {
5509 Type *UserTy = LI->getType();
5510 // LoadAndStorePromoter requires all the types to be the same.
5511 if (!LI->isSimple() || (PartitionType && UserTy != PartitionType))
5512 AllSameAndValid = false;
5513 PartitionType = UserTy;
5514 Insts.push_back(User);
5515 } else if (auto *SI = dyn_cast<StoreInst>(User)) {
5516 Type *UserTy = SI->getValueOperand()->getType();
5517 if (!SI->isSimple() || (PartitionType && UserTy != PartitionType))
5518 AllSameAndValid = false;
5519 PartitionType = UserTy;
5520 Insts.push_back(User);
5521 } else {
5522 AllSameAndValid = false;
5523 }
5524 }
5525
5526 Flush();
5527 return true;
5528}
5529
5530/// Analyze an alloca for SROA.
5531///
5532/// This analyzes the alloca to ensure we can reason about it, builds
5533/// the slices of the alloca, and then hands it off to be split and
5534/// rewritten as needed.
5535std::pair<bool /*Changed*/, bool /*CFGChanged*/>
5536SROA::runOnAlloca(AllocaInst &AI) {
5537 bool Changed = false;
5538 bool CFGChanged = false;
5539
5540 LLVM_DEBUG(dbgs() << "SROA alloca: " << AI << "\n");
5541 ++NumAllocasAnalyzed;
5542
5543 // Special case dead allocas, as they're trivial.
5544 if (AI.use_empty()) {
5545 AI.eraseFromParent();
5546 Changed = true;
5547 return {Changed, CFGChanged};
5548 }
5549 const DataLayout &DL = AI.getDataLayout();
5550
5551 // Skip alloca forms that this analysis can't handle.
5552 auto *AT = AI.getAllocatedType();
5553 TypeSize Size = DL.getTypeAllocSize(AT);
5554 if (AI.isArrayAllocation() || !AT->isSized() || Size.isScalable() ||
5555 Size.getFixedValue() == 0)
5556 return {Changed, CFGChanged};
5557
5558 // First, split any FCA loads and stores touching this alloca to promote
5559 // better splitting and promotion opportunities.
5560 IRBuilderTy IRB(&AI);
5561 AggLoadStoreRewriter AggRewriter(DL, IRB);
5562 Changed |= AggRewriter.rewrite(AI);
5563
5564 // Build the slices using a recursive instruction-visiting builder.
5565 AllocaSlices AS(DL, AI);
5566 LLVM_DEBUG(AS.print(dbgs()));
5567 if (AS.isEscaped())
5568 return {Changed, CFGChanged};
5569
5570 if (AS.isEscapedReadOnly()) {
5571 Changed |= propagateStoredValuesToLoads(AI, AS);
5572 return {Changed, CFGChanged};
5573 }
5574
5575 // Delete all the dead users of this alloca before splitting and rewriting it.
5576 for (Instruction *DeadUser : AS.getDeadUsers()) {
5577 // Free up everything used by this instruction.
5578 for (Use &DeadOp : DeadUser->operands())
5579 clobberUse(DeadOp);
5580
5581 // Now replace the uses of this instruction.
5582 DeadUser->replaceAllUsesWith(PoisonValue::get(DeadUser->getType()));
5583
5584 // And mark it for deletion.
5585 DeadInsts.push_back(DeadUser);
5586 Changed = true;
5587 }
5588 for (Use *DeadOp : AS.getDeadOperands()) {
5589 clobberUse(*DeadOp);
5590 Changed = true;
5591 }
5592
5593 // No slices to split. Leave the dead alloca for a later pass to clean up.
5594 if (AS.begin() == AS.end())
5595 return {Changed, CFGChanged};
5596
5597 Changed |= splitAlloca(AI, AS);
5598
5599 LLVM_DEBUG(dbgs() << " Speculating PHIs\n");
5600 while (!SpeculatablePHIs.empty())
5601 speculatePHINodeLoads(IRB, *SpeculatablePHIs.pop_back_val());
5602
5603 LLVM_DEBUG(dbgs() << " Rewriting Selects\n");
5604 auto RemainingSelectsToRewrite = SelectsToRewrite.takeVector();
5605 while (!RemainingSelectsToRewrite.empty()) {
5606 const auto [K, V] = RemainingSelectsToRewrite.pop_back_val();
5607 CFGChanged |=
5608 rewriteSelectInstMemOps(*K, V, IRB, PreserveCFG ? nullptr : DTU);
5609 }
5610
5611 return {Changed, CFGChanged};
5612}
5613
5614/// Delete the dead instructions accumulated in this run.
5615///
5616/// Recursively deletes the dead instructions we've accumulated. This is done
5617/// at the very end to maximize locality of the recursive delete and to
5618/// minimize the problems of invalidated instruction pointers as such pointers
5619/// are used heavily in the intermediate stages of the algorithm.
5620///
5621/// We also record the alloca instructions deleted here so that they aren't
5622/// subsequently handed to mem2reg to promote.
5623bool SROA::deleteDeadInstructions(
5624 SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) {
5625 bool Changed = false;
5626 while (!DeadInsts.empty()) {
5627 Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
5628 if (!I)
5629 continue;
5630 LLVM_DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n");
5631
5632 // If the instruction is an alloca, find the possible dbg.declare connected
5633 // to it, and remove it too. We must do this before calling RAUW or we will
5634 // not be able to find it.
5635 if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
5636 DeletedAllocas.insert(AI);
5637 for (DbgVariableRecord *OldDII : findDVRDeclares(AI))
5638 OldDII->eraseFromParent();
5639 }
5640
5642 I->replaceAllUsesWith(UndefValue::get(I->getType()));
5643
5644 for (Use &Operand : I->operands())
5645 if (Instruction *U = dyn_cast<Instruction>(Operand)) {
5646 // Zero out the operand and see if it becomes trivially dead.
5647 Operand = nullptr;
5649 DeadInsts.push_back(U);
5650 }
5651
5652 ++NumDeleted;
5653 I->eraseFromParent();
5654 Changed = true;
5655 }
5656 return Changed;
5657}
5658/// Promote the allocas, using the best available technique.
5659///
5660/// This attempts to promote whatever allocas have been identified as viable in
5661/// the PromotableAllocas list. If that list is empty, there is nothing to do.
5662/// This function returns whether any promotion occurred.
5663bool SROA::promoteAllocas() {
5664 if (PromotableAllocas.empty())
5665 return false;
5666
5667 if (SROASkipMem2Reg) {
5668 LLVM_DEBUG(dbgs() << "Not promoting allocas with mem2reg!\n");
5669 } else {
5670 LLVM_DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
5671 NumPromoted += PromotableAllocas.size();
5672 PromoteMemToReg(PromotableAllocas.getArrayRef(), DTU->getDomTree(), AC);
5673 }
5674
5675 PromotableAllocas.clear();
5676 return true;
5677}
5678
5679std::pair<bool /*Changed*/, bool /*CFGChanged*/> SROA::runSROA(Function &F) {
5680 LLVM_DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
5681
5682 const DataLayout &DL = F.getDataLayout();
5683 BasicBlock &EntryBB = F.getEntryBlock();
5684 for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end());
5685 I != E; ++I) {
5686 if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
5687 if (DL.getTypeAllocSize(AI->getAllocatedType()).isScalable() &&
5689 PromotableAllocas.insert(AI);
5690 else
5691 Worklist.insert(AI);
5692 }
5693 }
5694
5695 bool Changed = false;
5696 bool CFGChanged = false;
5697 // A set of deleted alloca instruction pointers which should be removed from
5698 // the list of promotable allocas.
5699 SmallPtrSet<AllocaInst *, 4> DeletedAllocas;
5700
5701 do {
5702 while (!Worklist.empty()) {
5703 auto [IterationChanged, IterationCFGChanged] =
5704 runOnAlloca(*Worklist.pop_back_val());
5705 Changed |= IterationChanged;
5706 CFGChanged |= IterationCFGChanged;
5707
5708 Changed |= deleteDeadInstructions(DeletedAllocas);
5709
5710 // Remove the deleted allocas from various lists so that we don't try to
5711 // continue processing them.
5712 if (!DeletedAllocas.empty()) {
5713 Worklist.set_subtract(DeletedAllocas);
5714 PostPromotionWorklist.set_subtract(DeletedAllocas);
5715 PromotableAllocas.set_subtract(DeletedAllocas);
5716 DeletedAllocas.clear();
5717 }
5718 }
5719
5720 Changed |= promoteAllocas();
5721
5722 Worklist = PostPromotionWorklist;
5723 PostPromotionWorklist.clear();
5724 } while (!Worklist.empty());
5725
5726 assert((!CFGChanged || Changed) && "Can not only modify the CFG.");
5727 assert((!CFGChanged || !PreserveCFG) &&
5728 "Should not have modified the CFG when told to preserve it.");
5729
5730 if (Changed && isAssignmentTrackingEnabled(*F.getParent())) {
5731 for (auto &BB : F) {
5733 }
5734 }
5735
5736 return {Changed, CFGChanged};
5737}
5738
5742 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
5743 auto [Changed, CFGChanged] =
5744 SROA(&F.getContext(), &DTU, &AC, PreserveCFG).runSROA(F);
5745 if (!Changed)
5746 return PreservedAnalyses::all();
5748 if (!CFGChanged)
5751 return PA;
5752}
5753
5755 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
5756 static_cast<PassInfoMixin<SROAPass> *>(this)->printPipeline(
5757 OS, MapClassName2PassName);
5758 OS << (PreserveCFG == SROAOptions::PreserveCFG ? "<preserve-cfg>"
5759 : "<modify-cfg>");
5760}
5761
5763
5764namespace {
5765
5766/// A legacy pass for the legacy pass manager that wraps the \c SROA pass.
5767class SROALegacyPass : public FunctionPass {
5769
5770public:
5771 static char ID;
5772
5773 SROALegacyPass(SROAOptions PreserveCFG = SROAOptions::PreserveCFG)
5776 }
5777
5778 bool runOnFunction(Function &F) override {
5779 if (skipFunction(F))
5780 return false;
5781
5782 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
5783 AssumptionCache &AC =
5784 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
5785 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
5786 auto [Changed, _] =
5787 SROA(&F.getContext(), &DTU, &AC, PreserveCFG).runSROA(F);
5788 return Changed;
5789 }
5790
5791 void getAnalysisUsage(AnalysisUsage &AU) const override {
5796 }
5797
5798 StringRef getPassName() const override { return "SROA"; }
5799};
5800
5801} // end anonymous namespace
5802
5803char SROALegacyPass::ID = 0;
5804
5806 return new SROALegacyPass(PreserveCFG ? SROAOptions::PreserveCFG
5808}
5809
5810INITIALIZE_PASS_BEGIN(SROALegacyPass, "sroa",
5811 "Scalar Replacement Of Aggregates", false, false)
5814INITIALIZE_PASS_END(SROALegacyPass, "sroa", "Scalar Replacement Of Aggregates",
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:638
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:298
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
std::string Name
uint32_t Index
uint64_t Size
static bool runOnFunction(Function &F, bool PostInlining)
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
#define _
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
static std::optional< uint64_t > getSizeInBytes(std::optional< uint64_t > SizeInBits)
Memory SSA
Definition: MemorySSA.cpp:72
This file contains the declarations for metadata subclasses.
uint64_t IntrinsicInst * II
#define P(N)
if(PassOpts->AAPipeline)
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:39
This file defines the PointerIntPair class.
This file provides a collection of visitors which walk the (instruction) uses of a pointer.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static unsigned getNumElements(Type *Ty)
bool isDead(const MachineInstr &MI, const MachineRegisterInfo &MRI)
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, uint64_t OldAllocaOffsetInBits, uint64_t SliceSizeInBits, Instruction *OldInst, Instruction *Inst, Value *Dest, Value *Value, const DataLayout &DL)
Find linked dbg.assign and generate a new one with the correct FragmentInfo.
Definition: SROA.cpp:337
static VectorType * isVectorPromotionViable(Partition &P, const DataLayout &DL, unsigned VScale)
Test whether the given alloca partitioning and range of slices can be promoted to a vector.
Definition: SROA.cpp:2323
static Align getAdjustedAlignment(Instruction *I, uint64_t Offset)
Compute the adjusted alignment for a load or store from an offset.
Definition: SROA.cpp:1898
static bool checkVectorTypeForPromotion(Partition &P, VectorType *VTy, const DataLayout &DL, unsigned VScale)
Test whether a vector type is viable for promotion.
Definition: SROA.cpp:2164
static cl::opt< bool > SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false), cl::Hidden)
Disable running mem2reg during SROA in order to test or debug SROA.
static VectorType * checkVectorTypesForPromotion(Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool HaveCommonEltTy, Type *CommonEltTy, bool HaveVecPtrTy, bool HaveCommonVecPtrTy, VectorType *CommonVecPtrTy, unsigned VScale)
Test whether any vector type in CandidateTys is viable for promotion.
Definition: SROA.cpp:2193
static std::pair< Type *, IntegerType * > findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, uint64_t EndOffset)
Walk the range of a partitioning looking for a common type to cover this sequence of slices.
Definition: SROA.cpp:1465
static Type * stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty)
Strip aggregate type wrapping.
Definition: SROA.cpp:4227
static FragCalcResult calculateFragment(DILocalVariable *Variable, uint64_t NewStorageSliceOffsetInBits, uint64_t NewStorageSliceSizeInBits, std::optional< DIExpression::FragmentInfo > StorageFragment, std::optional< DIExpression::FragmentInfo > CurrentFragment, DIExpression::FragmentInfo &Target)
Definition: SROA.cpp:272
static DIExpression * createOrReplaceFragment(const DIExpression *Expr, DIExpression::FragmentInfo Frag, int64_t BitExtractOffset)
Create or replace an existing fragment in a DIExpression with Frag.
Definition: SROA.cpp:5103
static Value * insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, Value *V, uint64_t Offset, const Twine &Name)
Definition: SROA.cpp:2566
static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, VectorType *Ty, uint64_t ElementSize, const DataLayout &DL, unsigned VScale)
Test whether the given slice use can be promoted to a vector.
Definition: SROA.cpp:2089
static Value * getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, APInt Offset, Type *PointerTy, const Twine &NamePrefix)
Compute an adjusted pointer from Ptr by Offset bytes where the resulting pointer has PointerTy.
Definition: SROA.cpp:1887
static bool isIntegerWideningViableForSlice(const Slice &S, uint64_t AllocBeginOffset, Type *AllocaTy, const DataLayout &DL, bool &WholeAllocaOp)
Test whether a slice of an alloca is valid for integer widening.
Definition: SROA.cpp:2405
Scalar Replacement Of Aggregates
Definition: SROA.cpp:5814
static Value * extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, unsigned EndIndex, const Twine &Name)
Definition: SROA.cpp:2599
static Value * foldPHINodeOrSelectInst(Instruction &I)
A helper that folds a PHI node or a select.
Definition: SROA.cpp:984
static bool rewriteSelectInstMemOps(SelectInst &SI, const RewriteableMemOps &Ops, IRBuilderTy &IRB, DomTreeUpdater *DTU)
Definition: SROA.cpp:1853
static void rewriteMemOpOfSelect(SelectInst &SI, T &I, SelectHandSpeculativity Spec, DomTreeUpdater &DTU)
Definition: SROA.cpp:1786
sroa
Definition: SROA.cpp:5814
static Value * foldSelectInst(SelectInst &SI)
Definition: SROA.cpp:971
bool isKillAddress(const DbgVariableRecord *DVR)
Definition: SROA.cpp:5066
static Value * insertVector(IRBuilderTy &IRB, Value *Old, Value *V, unsigned BeginIndex, const Twine &Name)
Definition: SROA.cpp:2621
static bool isIntegerWideningViable(Partition &P, Type *AllocaTy, const DataLayout &DL)
Test whether the given alloca partition's integer operations can be widened to promotable ones.
Definition: SROA.cpp:2500
static void speculatePHINodeLoads(IRBuilderTy &IRB, PHINode &PN)
Definition: SROA.cpp:1605
static VectorType * createAndCheckVectorTypesForPromotion(SetVector< Type * > &OtherTys, ArrayRef< VectorType * > CandidateTysCopy, function_ref< void(Type *)> CheckCandidateType, Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy, bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy, unsigned VScale)
Definition: SROA.cpp:2279
static DebugVariable getAggregateVariable(DbgVariableRecord *DVR)
Definition: SROA.cpp:318
static bool isSafePHIToSpeculate(PHINode &PN)
PHI instructions that use an alloca and are subsequently loaded can be rewritten to load both input p...
Definition: SROA.cpp:1531
static Value * extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, IntegerType *Ty, uint64_t Offset, const Twine &Name)
Definition: SROA.cpp:2541
static void insertNewDbgInst(DIBuilder &DIB, DbgVariableRecord *Orig, AllocaInst *NewAddr, DIExpression *NewAddrExpr, Instruction *BeforeInst, std::optional< DIExpression::FragmentInfo > NewFragment, int64_t BitExtractAdjustment)
Insert a new DbgRecord.
Definition: SROA.cpp:5168
static void speculateSelectInstLoads(SelectInst &SI, LoadInst &LI, IRBuilderTy &IRB)
Definition: SROA.cpp:1748
const DIExpression * getAddressExpression(const DbgVariableRecord *DVR)
Definition: SROA.cpp:5072
static Type * getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, uint64_t Size)
Try to find a partition of the aggregate type passed in for a given offset and size.
Definition: SROA.cpp:4265
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy, unsigned VScale=0)
Test whether we can convert a value from the old to the new type.
Definition: SROA.cpp:1908
static SelectHandSpeculativity isSafeLoadOfSelectToSpeculate(LoadInst &LI, SelectInst &SI, bool PreserveCFG)
Definition: SROA.cpp:1686
static Value * convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, Type *NewTy)
Generic routine to convert an SSA value to a value of a different type.
Definition: SROA.cpp:1998
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
#define LLVM_DEBUG(...)
Definition: Debug.h:119
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Virtual Register Rewriter
Definition: VirtRegMap.cpp:269
Value * RHS
Value * LHS
Builder for the alloca slices.
Definition: SROA.cpp:996
SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
Definition: SROA.cpp:1012
An iterator over partitions of the alloca's slices.
Definition: SROA.cpp:784
bool operator==(const partition_iterator &RHS) const
Definition: SROA.cpp:931
partition_iterator & operator++()
Definition: SROA.cpp:951
A basic LoadAndStorePromoter that does not remove store nodes.
Definition: SROA.cpp:5438
bool shouldDelete(Instruction *I) const override
Return false if a sub-class wants to keep one of the loads/stores after the SSA construction.
Definition: SROA.cpp:5443
BasicLoadAndStorePromoter(ArrayRef< const Instruction * > Insts, SSAUpdater &S, Type *ZeroType)
Definition: SROA.cpp:5440
Value * getValueToUseForAlloca(Instruction *I) const override
Return the value to use for the point in the code that the alloca is positioned.
Definition: SROA.cpp:5447
Class for arbitrary precision integers.
Definition: APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1540
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1182
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:475
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1221
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Definition: Instructions.h:64
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:128
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:101
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:121
unsigned getAddressSpace() const
Return the address space for the allocation.
Definition: Instructions.h:106
LLVM_ABI bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:255
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:412
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:136
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
iterator begin() const
Definition: ArrayRef.h:135
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
iterator end()
Definition: BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:459
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:213
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:233
This class represents a no-op cast from one type to another.
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:73
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1116
LLVM_ABI CaptureInfo getCaptureInfo(unsigned OpNo) const
Return which pointer components this operand may capture.
bool onlyReadsMemory(unsigned OpNo) const
Definition: InstrTypes.h:1751
bool isDataOperand(const Use *U) const
Definition: InstrTypes.h:1244
This class represents a function call, abstracting a target machine's calling convention.
ConstantFolder - Create constants with minimum, target independent, folding.
This is the shared class of boolean and integer constants.
Definition: Constants.h:87
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1423
This is an important base class in LLVM.
Definition: Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:420
Assignment ID.
static DIAssignID * getDistinct(LLVMContext &Context)
LLVM_ABI DbgInstPtr insertDbgAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *SrcVar, DIExpression *ValExpr, Value *Addr, DIExpression *AddrExpr, const DILocation *DL)
Insert a new llvm.dbg.assign intrinsic call.
Definition: DIBuilder.cpp:1086
DWARF expression.
iterator_range< expr_op_iterator > expr_ops() const
DbgVariableFragmentInfo FragmentInfo
LLVM_ABI bool startsWithDeref() const
Return whether the first element a DW_OP_deref.
static LLVM_ABI bool calculateFragmentIntersect(const DataLayout &DL, const Value *SliceStart, uint64_t SliceOffsetInBits, uint64_t SliceSizeInBits, const Value *DbgPtr, int64_t DbgPtrOffsetInBits, int64_t DbgExtractOffsetInBits, DIExpression::FragmentInfo VarFrag, std::optional< DIExpression::FragmentInfo > &Result, int64_t &OffsetFromLocationInBits)
Computes a fragment, bit-extract operation if needed, and new constant offset to describe a part of a...
static LLVM_ABI std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
static LLVM_ABI DIExpression * prepend(const DIExpression *Expr, uint8_t Flags, int64_t Offset=0)
Prepend DIExpr with a deref and offset operation and optionally turn it into a stack value or/and an ...
LLVM_ABI bool extractLeadingOffset(int64_t &OffsetInBytes, SmallVectorImpl< uint64_t > &RemainingOps) const
Assuming that the expression operates on an address, extract a constant offset and the successive ops...
LLVM_ABI std::optional< uint64_t > getSizeInBits() const
Determines the size of the variable's type.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool typeSizeEqualsStoreSize(Type *Ty) const
Returns true if no extra padding bits are needed when storing the specified type.
Definition: DataLayout.h:492
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition: DataLayout.h:468
DebugLoc getDebugLoc() const
LLVM_ABI void moveBefore(DbgRecord *MoveBefore)
void setDebugLoc(DebugLoc Loc)
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LLVM_ABI bool isKillAddress() const
Check whether this kills the address component.
LLVM_ABI bool isKillLocation() const
Value * getValue(unsigned OpIdx=0) const
static LLVM_ABI DbgVariableRecord * createDbgVariableRecord(Value *Location, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
static LLVM_ABI DbgVariableRecord * createDVRDeclare(Value *Address, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
DIExpression * getExpression() const
LLVM_ABI void setKillAddress()
Kill the address component.
DILocalVariable * getVariable() const
static LLVM_ABI DbgVariableRecord * createLinkedDVRAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *Variable, DIExpression *Expression, Value *Address, DIExpression *AddressExpression, const DILocation *DI)
DIExpression * getAddressExpression() const
This class is used to track local variable information.
Definition: DwarfDebug.h:214
LLVM_ABI DILocation * getInlinedAt() const
Definition: DebugLoc.cpp:69
Identifies a unique instance of a variable.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:203
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:177
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:173
iterator end()
Definition: DenseMap.h:87
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:230
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:284
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:322
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:165
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:803
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:314
unsigned getVScaleValue() const
Return the value for vscale based on the vscale_range attribute or 0 when unknown.
Definition: Function.cpp:1160
const BasicBlock & getEntryBlock() const
Definition: Function.h:807
Represents flags for the getelementptr instruction/expression.
LLVM_ABI bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset, function_ref< bool(Value &, APInt &)> ExternalAnalysis=nullptr) const
Accumulate the constant address offset of this GEP if possible.
Definition: Operator.cpp:113
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:949
iterator_range< op_iterator > indices()
Type * getSourceElementType() const
LLVM_ABI GEPNoWrapFlags getNoWrapFlags() const
Get the nowrap flags for the GEP instruction.
Legacy wrapper pass to provide the GlobalsAAResult object.
This provides the default implementation of the IRBuilder 'InsertHelper' method that is called whenev...
Definition: IRBuilder.h:61
virtual void InsertHelper(Instruction *I, const Twine &Name, BasicBlock::iterator InsertPt) const
Definition: IRBuilder.h:65
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2780
Base class for instruction visitors.
Definition: InstVisitor.h:78
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:513
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:78
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1804
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
Definition: Instruction.h:406
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:171
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:82
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:428
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1718
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition: Metadata.cpp:1789
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:510
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:86
Class to represent integer types.
Definition: DerivedTypes.h:42
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:74
@ MAX_INT_BITS
Maximum number of bits that can be specified.
Definition: DerivedTypes.h:54
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:49
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
Helper class for promoting a collection of loads and stores into SSA Form using the SSAUpdater.
Definition: SSAUpdater.h:147
An instruction for reading from memory.
Definition: Instructions.h:180
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:265
void setAlignment(Align Align)
Definition: Instructions.h:219
Value * getPointerOperand()
Definition: Instructions.h:259
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:209
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Definition: Instructions.h:245
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:224
Type * getPointerOperandType() const
Definition: Instructions.h:262
static unsigned getPointerOperandIndex()
Definition: Instructions.h:261
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:234
bool isSimple() const
Definition: Instructions.h:251
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:215
LLVMContext & getContext() const
Definition: Metadata.h:1241
This is the common base class for memset/memcpy/memmove.
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
This class wraps the llvm.memcpy/memmove intrinsics.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:278
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:99
PointerIntPair - This class implements a pair of a pointer and small integer.
void setPointer(PointerTy PtrVal) &
IntType getInt() const
void setInt(IntType IntVal) &
PointerTy getPointer() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1885
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition: Analysis.h:132
A base class for visitors over the uses of a pointer value.
void visitGetElementPtrInst(GetElementPtrInst &GEPI)
void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC)
void visitBitCastInst(BitCastInst &BC)
void visitIntrinsicInst(IntrinsicInst &II)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
Definition: SROA.cpp:5739
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Definition: SROA.cpp:5754
SROAPass(SROAOptions PreserveCFG)
If PreserveCFG is set, then the pass is not allowed to modify CFG in any way, even if it would update...
Definition: SROA.cpp:5762
Helper class for SSA formation on a set of values defined in multiple blocks.
Definition: SSAUpdater.h:39
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
Definition: SetVector.h:59
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:104
void clear()
Completely clear the SetVector.
Definition: SetVector.h:284
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:168
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:380
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:418
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:470
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:356
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:938
void reserve(size_type N)
Definition: SmallVector.h:664
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:579
typename SuperClass::iterator iterator
Definition: SmallVector.h:578
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
An instruction for storing to memory.
Definition: Instructions.h:296
void setAlignment(Align Align)
Definition: Instructions.h:342
Value * getValueOperand()
Definition: Instructions.h:383
static unsigned getPointerOperandIndex()
Definition: Instructions.h:388
Value * getPointerOperand()
Definition: Instructions.h:386
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:369
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:581
size_t rfind(char C, size_t From=npos) const
Search for the last character C in the string.
Definition: StringRef.h:353
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
Definition: StringRef.h:301
static constexpr size_t npos
Definition: StringRef.h:57
LLVM_ABI size_t find_first_not_of(char C, size_t From=0) const
Find the first character in the string that is not C or npos if not found.
Definition: StringRef.cpp:252
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:626
TypeSize getSizeInBytes() const
Definition: DataLayout.h:635
LLVM_ABI unsigned getElementContainingOffset(uint64_t FixedOffset) const
Given a valid byte offset into the structure, returns the structure index that contains it.
Definition: DataLayout.cpp:92
TypeSize getElementOffset(unsigned Idx) const
Definition: DataLayout.h:657
TypeSize getSizeInBits() const
Definition: DataLayout.h:637
Class to represent struct types.
Definition: DerivedTypes.h:218
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:414
element_iterator element_end() const
Definition: DerivedTypes.h:359
element_iterator element_begin() const
Definition: DerivedTypes.h:358
bool isPacked() const
Definition: DerivedTypes.h:286
Type * getElementType(unsigned N) const
Definition: DerivedTypes.h:369
Type::subtype_iterator element_iterator
Definition: DerivedTypes.h:356
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:346
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:264
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:246
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:267
Type * getArrayElementType() const
Definition: Type.h:408
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition: Type.h:296
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:261
bool isTargetExtTy() const
Return true if this is a target extension type.
Definition: Type.h:203
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:270
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:240
LLVM_ABI unsigned getIntegerBitWidth() const
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:352
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1866
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
op_range operands()
Definition: User.h:292
op_iterator op_begin()
Definition: User.h:284
const Use & getOperandUse(unsigned i) const
Definition: User.h:245
Value * getOperand(unsigned i) const
Definition: User.h:232
op_iterator op_end()
Definition: User.h:286
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
user_iterator user_begin()
Definition: Value.h:402
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:546
LLVM_ABI const Value * stripInBoundsOffsets(function_ref< void(const Value *)> Func=[](const Value *) {}) const
Strip off pointer casts and inbounds GEPs.
Definition: Value.cpp:812
iterator_range< user_iterator > users()
Definition: Value.h:426
static LLVM_ABI void dropDroppableUse(Use &U)
Remove the droppable use U.
Definition: Value.cpp:226
LLVM_ABI void dropDroppableUsesIn(User &Usr)
Remove every use of this value in User that can safely be removed.
Definition: Value.cpp:218
LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const
Accumulate the constant offset this value has compared to a base pointer.
bool use_empty() const
Definition: Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1098
iterator_range< use_iterator > uses()
Definition: Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:396
void setAborted(Instruction *I)
Mark the visit as aborted.
Definition: PtrUseVisitor.h:86
void setEscapedAndAborted(Instruction *I)
Mark the pointer as escaped, and the visit as aborted.
void setEscapedReadOnly(Instruction *I)
Mark the pointer as escaped into a readonly-nocapture call.
Definition: PtrUseVisitor.h:99
APInt Offset
The constant offset of the use if that is known.
void enqueueUsers(Value &I)
Enqueue the users of this instruction in the visit worklist.
bool IsOffsetKnown
True if we have a known constant offset for the use currently being visited.
PtrInfo PI
The info collected about the pointer being visited thus far.
Use * U
The use currently being visited.
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:203
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:172
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
Definition: TypeSize.h:175
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:169
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition: ilist_node.h:34
self_iterator getIterator()
Definition: ilist_node.h:134
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
Definition: iterator.h:80
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
LLVM_ABI friend const_iterator begin(StringRef path, Style style)
Get begin iterator over path.
Definition: Path.cpp:226
LLVM_ABI friend const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Offsets
Offsets in bytes from the start of the input buffer.
Definition: SIInstrInfo.h:1695
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition: DebugInfo.h:201
LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
Definition: DebugInfo.cpp:1899
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
@ DW_OP_LLVM_extract_bits_zext
Only used in LLVM metadata.
Definition: Dwarf.h:150
@ DW_OP_LLVM_fragment
Only used in LLVM metadata.
Definition: Dwarf.h:143
@ DW_OP_LLVM_extract_bits_sext
Only used in LLVM metadata.
Definition: Dwarf.h:149
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
LLVM_ABI const_iterator begin(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
LLVM_ABI const_iterator end(StringRef path LLVM_LIFETIME_BOUND)
Get end iterator over path.
Definition: Path.cpp:235
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition: DWP.cpp:477
@ Length
Definition: DWP.cpp:477
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:362
void stable_sort(R &&Range)
Definition: STLExtras.h:2077
LLVM_ABI bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1737
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1702
LLVM_ABI void PromoteMemToReg(ArrayRef< AllocaInst * > Allocas, DominatorTree &DT, AssumptionCache *AC=nullptr)
Promote the specified list of alloca instructions into scalar registers, inserting PHI nodes as appro...
LLVM_ABI bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
auto successors(const MachineBasicBlock *BB)
void * PointerTy
Definition: GenericValue.h:21
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2113
LLVM_ABI void copyMetadataForLoad(LoadInst &Dest, const LoadInst &Source)
Copy the metadata from the source instruction to the destination (the replacement for the source inst...
Definition: Local.cpp:3090
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI std::optional< RegOrConstant > getVectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:1493
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:663
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
auto unique(Range &&R, Predicate P)
Definition: STLExtras.h:2095
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI bool isAllocaPromotable(const AllocaInst *AI)
Return true if this alloca is legal for promotion.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:2147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:402
bool capturesFullProvenance(CaptureComponents CC)
Definition: ModRef.h:336
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1669
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
Definition: Loads.cpp:431
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
LLVM_ABI void initializeSROALegacyPassPass(PassRegistry &)
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRValues(Value *V)
As above, for DVRValues.
Definition: DebugInfo.cpp:65
LLVM_ABI void llvm_unreachable_internal(const char *msg=nullptr, const char *file=nullptr, unsigned line=0)
This function calls abort(), and prints the optional message to stderr.
LLVM_ABI bool isAssignmentTrackingEnabled(const Module &M)
Return true if assignment tracking is enabled for module M.
Definition: DebugInfo.cpp:2259
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2139
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRDeclares(Value *V)
Finds dbg.declare records declaring local variables as living in the memory that 'V' points to.
Definition: DebugInfo.cpp:48
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1916
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
LLVM_ABI FunctionPass * createSROAPass(bool PreserveCFG=true)
Definition: SROA.cpp:5805
SROAOptions
Definition: SROA.h:24
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858
#define NDEBUG
Definition: regutils.h:48
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
AAMDNodes shift(size_t Offset) const
Create a new AAMDNode that describes this AAMDNode after applying a constant offset to the start of t...
Definition: Metadata.h:819
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Describes an element of a Bitfield.
Definition: Bitfields.h:223
Holds functions to get, set or test bitfields.
Definition: Bitfields.h:212
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:70
A MapVector that performs no allocations if smaller than a certain size.
Definition: MapVector.h:249