78#define DEBUG_TYPE "interleaved-access"
81 "lower-interleaved-accesses",
82 cl::desc(
"Enable lowering interleaved accesses to intrinsics"),
87class InterleavedAccessImpl {
88 friend class InterleavedAccess;
91 InterleavedAccessImpl() =
default;
93 : DT(DT), TLI(TLI), MaxFactor(TLI->getMaxSupportedInterleaveFactor()) {}
97 DominatorTree *DT =
nullptr;
98 const TargetLowering *TLI =
nullptr;
101 unsigned MaxFactor = 0
u;
104 bool lowerInterleavedLoad(Instruction *Load,
105 SmallSetVector<Instruction *, 32> &DeadInsts);
108 bool lowerInterleavedStore(Instruction *Store,
109 SmallSetVector<Instruction *, 32> &DeadInsts);
113 bool lowerDeinterleaveIntrinsic(IntrinsicInst *
II,
114 SmallSetVector<Instruction *, 32> &DeadInsts);
118 bool lowerInterleaveIntrinsic(IntrinsicInst *
II,
119 SmallSetVector<Instruction *, 32> &DeadInsts);
134 SmallVectorImpl<ShuffleVectorInst *> &Shuffles,
139 InterleavedAccessImpl Impl;
144 InterleavedAccess() : FunctionPass(ID) {
148 StringRef getPassName()
const override {
return "Interleaved Access Pass"; }
152 void getAnalysisUsage(AnalysisUsage &AU)
const override {
163 auto *TLI = TM->getSubtargetImpl(
F)->getTargetLowering();
164 InterleavedAccessImpl Impl(DT, TLI);
165 bool Changed = Impl.runOnFunction(
F);
175char InterleavedAccess::ID = 0;
177bool InterleavedAccess::runOnFunction(
Function &
F) {
181 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
185 LLVM_DEBUG(
dbgs() <<
"*** " << getPassName() <<
": " <<
F.getName() <<
"\n");
187 Impl.DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
189 Impl.TLI = TM.getSubtargetImpl(
F)->getTargetLowering();
190 Impl.MaxFactor = Impl.TLI->getMaxSupportedInterleaveFactor();
192 return Impl.runOnFunction(
F);
196 "Lower interleaved memory accesses to target specific intrinsics",
false,
200 "Lower interleaved memory accesses to target specific intrinsics",
false,
204 return new InterleavedAccess();
213 unsigned &Index,
unsigned MaxFactor,
214 unsigned NumLoadElements) {
219 for (Factor = 2; Factor <= MaxFactor; Factor++) {
221 if (Mask.size() * Factor > NumLoadElements)
242 unsigned MaxFactor) {
248 for (Factor = 2; Factor <= MaxFactor; Factor++) {
257 switch (
II->getIntrinsicID()) {
260 case Intrinsic::vp_load:
261 return II->getOperand(1);
262 case Intrinsic::masked_load:
263 return II->getOperand(2);
264 case Intrinsic::vp_store:
265 return II->getOperand(2);
266 case Intrinsic::masked_store:
267 return II->getOperand(3);
276static std::pair<Value *, APInt>
getMask(
Value *WideMask,
unsigned Factor,
279static std::pair<Value *, APInt>
getMask(
Value *WideMask,
unsigned Factor,
281 return getMask(WideMask, Factor, LeafValueTy->getElementCount());
284bool InterleavedAccessImpl::lowerInterleavedLoad(
294 if (LI && !LI->isSimple())
316 if (!BI->user_empty() &&
318 for (
auto *SVI : BI->users())
330 if (Shuffles.
empty() && BinOpShuffles.
empty())
333 unsigned Factor,
Index;
335 unsigned NumLoadElements =
337 auto *FirstSVI = Shuffles.
size() > 0 ? Shuffles[0] : BinOpShuffles[0];
350 for (
auto *Shuffle : Shuffles) {
351 if (Shuffle->getType() != VecTy)
354 Shuffle->getShuffleMask(), Factor, Index))
357 assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
360 for (
auto *Shuffle : BinOpShuffles) {
361 if (Shuffle->getType() != VecTy)
364 Shuffle->getShuffleMask(), Factor, Index))
367 assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
377 if (!tryReplaceExtracts(Extracts, Shuffles))
380 bool BinOpShuffleChanged =
381 replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, Load);
386 LLVM_DEBUG(
dbgs() <<
"IA: Found an interleaved load: " << *Load <<
"\n");
393 LLVM_DEBUG(
dbgs() <<
"IA: Found an interleaved vp.load or masked.load: "
396 <<
" and actual factor " << GapMask.popcount() <<
"\n");
402 Indices, Factor, GapMask))
404 return !Extracts.
empty() || BinOpShuffleChanged;
412bool InterleavedAccessImpl::replaceBinOpShuffles(
415 for (
auto *SVI : BinOpShuffles) {
420 return Idx < (int)cast<FixedVectorType>(BIOp0Ty)->getNumElements();
426 Mask, SVI->getName(), insertPos);
429 SVI->getName(), insertPos);
432 SVI->replaceAllUsesWith(NewBI);
434 <<
"\n With : " << *NewSVI1 <<
"\n And : "
435 << *NewSVI2 <<
"\n And : " << *NewBI <<
"\n");
437 if (NewSVI1->getOperand(0) == Load)
439 if (NewSVI2->getOperand(0) == Load)
443 return !BinOpShuffles.empty();
446bool InterleavedAccessImpl::tryReplaceExtracts(
451 if (Extracts.
empty())
458 for (
auto *Extract : Extracts) {
461 auto Index = IndexOperand->getSExtValue();
466 for (
auto *Shuffle : Shuffles) {
469 if (!DT->dominates(Shuffle, Extract))
476 Shuffle->getShuffleMask(Indices);
477 for (
unsigned I = 0;
I < Indices.
size(); ++
I)
478 if (Indices[
I] == Index) {
479 assert(Extract->getOperand(0) == Shuffle->getOperand(0) &&
480 "Vector operations do not match");
481 ReplacementMap[Extract] = std::make_pair(Shuffle,
I);
486 if (ReplacementMap.
count(Extract))
492 if (!ReplacementMap.
count(Extract))
498 for (
auto &Replacement : ReplacementMap) {
499 auto *Extract = Replacement.first;
500 auto *
Vector = Replacement.second.first;
501 auto Index = Replacement.second.second;
502 Builder.SetInsertPoint(Extract);
503 Extract->replaceAllUsesWith(Builder.CreateExtractElement(
Vector, Index));
504 Extract->eraseFromParent();
510bool InterleavedAccessImpl::lowerInterleavedStore(
518 StoredValue =
SI->getValueOperand();
520 assert(
II->getIntrinsicID() == Intrinsic::vp_store ||
521 II->getIntrinsicID() == Intrinsic::masked_store);
522 StoredValue =
II->getArgOperand(0);
529 unsigned NumStoredElements =
535 assert(NumStoredElements % Factor == 0 &&
536 "number of stored element should be a multiple of Factor");
541 LLVM_DEBUG(
dbgs() <<
"IA: Found an interleaved store: " << *Store <<
"\n");
544 unsigned LaneMaskLen = NumStoredElements / Factor;
550 LLVM_DEBUG(
dbgs() <<
"IA: Found an interleaved vp.store or masked.store: "
553 <<
" and actual factor " << GapMask.popcount() <<
"\n");
558 if (!TLI->lowerInterleavedStore(Store, Mask, SVI, Factor, GapMask))
573 unsigned LeafMaskLen,
APInt &GapMask) {
575 for (
unsigned F = 0U;
F < Factor; ++
F) {
577 for (
unsigned Idx = 0U; Idx < LeafMaskLen; ++Idx) {
579 if (!
C->isZeroValue()) {
590static std::pair<Value *, APInt>
getMask(
Value *WideMask,
unsigned Factor,
597 Value *RefArg =
nullptr;
600 for (
auto [Idx, Arg] :
enumerate(IMI->args())) {
602 GapMask.clearBit(Idx);
608 else if (RefArg != Arg)
609 return {
nullptr, GapMask};
615 return {RefArg ? RefArg : IMI->getArgOperand(0), GapMask};
621 AndOp && AndOp->getOpcode() == Instruction::And) {
622 auto [MaskLHS, GapMaskLHS] =
623 getMask(AndOp->getOperand(0), Factor, LeafValueEC);
624 auto [MaskRHS, GapMaskRHS] =
625 getMask(AndOp->getOperand(1), Factor, LeafValueEC);
626 if (!MaskLHS || !MaskRHS)
627 return {
nullptr, GapMask};
630 return {
IRBuilder<>(AndOp).CreateAnd(MaskLHS, MaskRHS),
631 GapMaskLHS & GapMaskRHS};
635 if (
auto *
Splat = ConstMask->getSplatValue())
642 getGapMask(*ConstMask, Factor, LeafMaskLen, GapMask);
648 for (
unsigned Idx = 0U; Idx < LeafMaskLen * Factor; ++Idx) {
649 if (!GapMask[Idx % Factor])
651 Constant *
C = ConstMask->getAggregateElement(Idx);
652 if (LeafMask[Idx / Factor] && LeafMask[Idx / Factor] !=
C)
653 return {
nullptr, GapMask};
654 LeafMask[Idx / Factor] =
C;
662 Type *Op1Ty = SVI->getOperand(1)->getType();
664 return {
nullptr, GapMask};
669 unsigned NumSrcElts =
673 NumSrcElts * 2, StartIndexes) &&
674 llvm::all_of(StartIndexes, [](
unsigned Start) { return Start == 0; }) &&
675 llvm::all_of(SVI->getShuffleMask(), [&NumSrcElts](
int Idx) {
676 return Idx < (int)NumSrcElts;
681 return {Builder.CreateExtractVector(LeafMaskTy, SVI->getOperand(0),
687 return {
nullptr, GapMask};
690bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
693 if (!LoadedVal || !LoadedVal->
hasOneUse())
702 assert(Factor &&
"unexpected deinterleave intrinsic");
709 LLVM_DEBUG(
dbgs() <<
"IA: Found a load with deinterleave intrinsic " << *DI
710 <<
" and factor = " << Factor <<
"\n");
713 if (
II->getIntrinsicID() != Intrinsic::masked_load &&
714 II->getIntrinsicID() != Intrinsic::vp_load)
718 APInt GapMask(Factor, 0);
719 std::tie(Mask, GapMask) =
726 if (GapMask.popcount() != Factor)
729 LLVM_DEBUG(
dbgs() <<
"IA: Found a vp.load or masked.load with deinterleave"
730 <<
" intrinsic " << *DI <<
" and factor = "
735 if (!TLI->lowerDeinterleaveIntrinsicToLoad(LoadedVal, Mask, DI))
740 DeadInsts.
insert(LoadedVal);
744bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
758 assert(Factor &&
"unexpected interleave intrinsic");
762 if (
II->getIntrinsicID() != Intrinsic::masked_store &&
763 II->getIntrinsicID() != Intrinsic::vp_store)
766 APInt GapMask(Factor, 0);
767 std::tie(Mask, GapMask) =
774 if (GapMask.popcount() != Factor)
777 LLVM_DEBUG(
dbgs() <<
"IA: Found a vp.store or masked.store with interleave"
778 <<
" intrinsic " << *IntII <<
" and factor = "
784 LLVM_DEBUG(
dbgs() <<
"IA: Found a store with interleave intrinsic "
785 << *IntII <<
" and factor = " << Factor <<
"\n");
789 if (!TLI->lowerInterleaveIntrinsicToStore(StoredBy, Mask, InterleaveValues))
793 DeadInsts.
insert(StoredBy);
798bool InterleavedAccessImpl::runOnFunction(
Function &
F) {
808 Changed |= lowerInterleavedLoad(&
I, DeadInsts);
813 Changed |= lowerInterleavedStore(&
I, DeadInsts);
817 Changed |= lowerDeinterleaveIntrinsic(
II, DeadInsts);
819 Changed |= lowerInterleaveIntrinsic(
II, DeadInsts);
823 for (
auto *
I : DeadInsts)
824 I->eraseFromParent();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Expand Atomic instructions
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
static bool isDeInterleaveMask(ArrayRef< int > Mask, unsigned &Factor, unsigned &Index, unsigned MaxFactor, unsigned NumLoadElements)
Check if the mask is a DE-interleave mask for an interleaved load.
static void getGapMask(const Constant &MaskConst, unsigned Factor, unsigned LeafMaskLen, APInt &GapMask)
static cl::opt< bool > LowerInterleavedAccesses("lower-interleaved-accesses", cl::desc("Enable lowering interleaved accesses to intrinsics"), cl::init(true), cl::Hidden)
static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor, unsigned MaxFactor)
Check if the mask can be used in an interleaved store.
static Value * getMaskOperand(IntrinsicInst *II)
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
This file contains the declaration of the InterleavedAccessPass class, its corresponding pass name is...
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
unsigned getBitWidth() const
Return the number of bits in the APInt.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Represents analyses that only rely on functions' control flow.
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static constexpr ElementCount getFixed(ScalarTy MinVal)
FunctionPass class - This class is used to implement most global optimizations.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
void insert_range(Range &&R)
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
This instruction constructs a fixed permutation of two input vectors.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
LLVM_ABI bool isInterleave(unsigned Factor)
Return if this shuffle interleaves its two input vectors together.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Base class of all SIMD vector types.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
TwoOps_match< ValueOpTy, PointerOpTy, Instruction::Store > m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp)
Matches StoreInst.
bool match(Val *V, const Pattern &P)
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_Undef()
Match an arbitrary undef constant.
MatchFunctor< Val, Pattern > match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
initializer< Ty > init(const Ty &Val)
Context & getContext() const
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI void initializeInterleavedAccessPass(PassRegistry &)
LLVM_ABI unsigned getDeinterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI unsigned getInterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.interleaveN intrinsics.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI FunctionPass * createInterleavedAccessPass()
InterleavedAccess Pass - This pass identifies and matches interleaved memory accesses to target speci...
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI VectorType * getDeinterleavedVectorType(IntrinsicInst *DI)
Given a deinterleaveN intrinsic, return the (narrow) vector type of each factor.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.