78#define DEBUG_TYPE "interleaved-access"
81 "lower-interleaved-accesses",
82 cl::desc(
"Enable lowering interleaved accesses to intrinsics"),
87class InterleavedAccessImpl {
88 friend class InterleavedAccess;
91 InterleavedAccessImpl() =
default;
93 : DT(DT), TLI(TLI), MaxFactor(TLI->getMaxSupportedInterleaveFactor()) {}
101 unsigned MaxFactor = 0
u;
139 InterleavedAccessImpl Impl;
164 InterleavedAccessImpl Impl(DT, TLI);
165 bool Changed = Impl.runOnFunction(
F);
175char InterleavedAccess::ID = 0;
177bool InterleavedAccess::runOnFunction(
Function &
F) {
181 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
185 LLVM_DEBUG(
dbgs() <<
"*** " << getPassName() <<
": " <<
F.getName() <<
"\n");
187 Impl.DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
189 Impl.TLI = TM.getSubtargetImpl(
F)->getTargetLowering();
190 Impl.MaxFactor = Impl.TLI->getMaxSupportedInterleaveFactor();
192 return Impl.runOnFunction(
F);
196 "Lower interleaved memory accesses to target specific intrinsics",
false,
204 return new InterleavedAccess();
213 unsigned &Index,
unsigned MaxFactor,
214 unsigned NumLoadElements) {
219 for (Factor = 2; Factor <= MaxFactor; Factor++) {
221 if (Mask.size() * Factor > NumLoadElements)
242 unsigned MaxFactor) {
248 for (Factor = 2; Factor <= MaxFactor; Factor++) {
257 switch (
II->getIntrinsicID()) {
260 case Intrinsic::vp_load:
261 return II->getOperand(1);
262 case Intrinsic::masked_load:
263 return II->getOperand(2);
264 case Intrinsic::vp_store:
265 return II->getOperand(2);
266 case Intrinsic::masked_store:
267 return II->getOperand(3);
276static std::pair<Value *, APInt>
getMask(
Value *WideMask,
unsigned Factor,
279static std::pair<Value *, APInt>
getMask(
Value *WideMask,
unsigned Factor,
281 return getMask(WideMask, Factor, LeafValueTy->getElementCount());
284bool InterleavedAccessImpl::lowerInterleavedLoad(
286 if (isa<ScalableVectorType>(
Load->getType()))
289 auto *LI = dyn_cast<LoadInst>(Load);
290 auto *
II = dyn_cast<IntrinsicInst>(Load);
294 if (LI && !LI->isSimple())
309 auto *Extract = dyn_cast<ExtractElementInst>(
User);
310 if (Extract && isa<ConstantInt>(Extract->getIndexOperand())) {
314 if (
auto *BI = dyn_cast<BinaryOperator>(
User)) {
315 if (!BI->user_empty() &&
all_of(BI->users(), [](
auto *U) {
316 auto *SVI = dyn_cast<ShuffleVectorInst>(U);
317 return SVI && isa<UndefValue>(SVI->getOperand(1));
319 for (
auto *SVI : BI->users())
320 BinOpShuffles.
insert(cast<ShuffleVectorInst>(SVI));
324 auto *SVI = dyn_cast<ShuffleVectorInst>(
User);
325 if (!SVI || !isa<UndefValue>(SVI->getOperand(1)))
331 if (Shuffles.
empty() && BinOpShuffles.
empty())
334 unsigned Factor,
Index;
336 unsigned NumLoadElements =
337 cast<FixedVectorType>(
Load->getType())->getNumElements();
338 auto *FirstSVI = Shuffles.
size() > 0 ? Shuffles[0] : BinOpShuffles[0];
347 VectorType *VecTy = cast<VectorType>(FirstSVI->getType());
351 for (
auto *Shuffle : Shuffles) {
352 if (Shuffle->getType() != VecTy)
355 Shuffle->getShuffleMask(), Factor, Index))
358 assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
361 for (
auto *Shuffle : BinOpShuffles) {
362 if (Shuffle->getType() != VecTy)
365 Shuffle->getShuffleMask(), Factor, Index))
368 assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
370 if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(0) == Load)
372 if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(1) == Load)
378 if (!tryReplaceExtracts(Extracts, Shuffles))
381 bool BinOpShuffleChanged =
382 replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, Load);
387 LLVM_DEBUG(
dbgs() <<
"IA: Found an interleaved load: " << *Load <<
"\n");
394 LLVM_DEBUG(
dbgs() <<
"IA: Found an interleaved vp.load or masked.load: "
397 <<
" and actual factor " << GapMask.popcount() <<
"\n");
402 if (!TLI->lowerInterleavedLoad(cast<Instruction>(Load), Mask, Shuffles,
403 Indices, Factor, GapMask))
405 return !Extracts.
empty() || BinOpShuffleChanged;
413bool InterleavedAccessImpl::replaceBinOpShuffles(
416 for (
auto *SVI : BinOpShuffles) {
421 return Idx < (int)cast<FixedVectorType>(BIOp0Ty)->getNumElements();
427 Mask, SVI->
getName(), insertPos);
433 SVI->replaceAllUsesWith(NewBI);
435 <<
"\n With : " << *NewSVI1 <<
"\n And : "
436 << *NewSVI2 <<
"\n And : " << *NewBI <<
"\n");
438 if (NewSVI1->getOperand(0) == Load)
440 if (NewSVI2->getOperand(0) == Load)
444 return !BinOpShuffles.empty();
447bool InterleavedAccessImpl::tryReplaceExtracts(
452 if (Extracts.
empty())
459 for (
auto *Extract : Extracts) {
461 auto *IndexOperand = cast<ConstantInt>(Extract->getIndexOperand());
462 auto Index = IndexOperand->getSExtValue();
467 for (
auto *Shuffle : Shuffles) {
477 Shuffle->getShuffleMask(Indices);
478 for (
unsigned I = 0;
I < Indices.
size(); ++
I)
479 if (Indices[
I] == Index) {
480 assert(Extract->getOperand(0) == Shuffle->getOperand(0) &&
481 "Vector operations do not match");
482 ReplacementMap[Extract] = std::make_pair(Shuffle,
I);
487 if (ReplacementMap.
count(Extract))
493 if (!ReplacementMap.
count(Extract))
499 for (
auto &Replacement : ReplacementMap) {
500 auto *Extract = Replacement.first;
501 auto *
Vector = Replacement.second.first;
502 auto Index = Replacement.second.second;
503 Builder.SetInsertPoint(Extract);
504 Extract->replaceAllUsesWith(Builder.CreateExtractElement(
Vector, Index));
505 Extract->eraseFromParent();
511bool InterleavedAccessImpl::lowerInterleavedStore(
514 auto *
SI = dyn_cast<StoreInst>(Store);
515 auto *
II = dyn_cast<IntrinsicInst>(Store);
519 StoredValue =
SI->getValueOperand();
521 assert(
II->getIntrinsicID() == Intrinsic::vp_store ||
522 II->getIntrinsicID() == Intrinsic::masked_store);
523 StoredValue =
II->getArgOperand(0);
526 auto *SVI = dyn_cast<ShuffleVectorInst>(StoredValue);
527 if (!SVI || !SVI->hasOneUse() || isa<ScalableVectorType>(SVI->getType()))
530 unsigned NumStoredElements =
531 cast<FixedVectorType>(SVI->getType())->getNumElements();
536 assert(NumStoredElements % Factor == 0 &&
537 "number of stored element should be a multiple of Factor");
542 LLVM_DEBUG(
dbgs() <<
"IA: Found an interleaved store: " << *Store <<
"\n");
545 unsigned LaneMaskLen = NumStoredElements / Factor;
551 LLVM_DEBUG(
dbgs() <<
"IA: Found an interleaved vp.store or masked.store: "
554 <<
" and actual factor " << GapMask.popcount() <<
"\n");
559 if (!TLI->lowerInterleavedStore(Store, Mask, SVI, Factor, GapMask))
574 unsigned LeafMaskLen,
APInt &GapMask) {
576 for (
unsigned F = 0U;
F < Factor; ++
F) {
578 for (
unsigned Idx = 0U;
Idx < LeafMaskLen; ++
Idx) {
580 if (!
C->isZeroValue()) {
591static std::pair<Value *, APInt>
getMask(
Value *WideMask,
unsigned Factor,
595 if (
auto *IMI = dyn_cast<IntrinsicInst>(WideMask)) {
598 Value *RefArg =
nullptr;
602 if (
auto *
C = dyn_cast<Constant>(Arg);
C &&
C->isZeroValue()) {
603 GapMask.clearBit(
Idx);
609 else if (RefArg != Arg)
610 return {
nullptr, GapMask};
616 return {RefArg ? RefArg : IMI->getArgOperand(0), GapMask};
621 if (
auto *AndOp = dyn_cast<BinaryOperator>(WideMask);
622 AndOp && AndOp->getOpcode() == Instruction::And) {
623 auto [MaskLHS, GapMaskLHS] =
624 getMask(AndOp->getOperand(0), Factor, LeafValueEC);
625 auto [MaskRHS, GapMaskRHS] =
626 getMask(AndOp->getOperand(1), Factor, LeafValueEC);
627 if (!MaskLHS || !MaskRHS)
628 return {
nullptr, GapMask};
632 GapMaskLHS & GapMaskRHS};
635 if (
auto *ConstMask = dyn_cast<Constant>(WideMask)) {
636 if (
auto *
Splat = ConstMask->getSplatValue())
643 getGapMask(*ConstMask, Factor, LeafMaskLen, GapMask);
649 for (
unsigned Idx = 0U;
Idx < LeafMaskLen * Factor; ++
Idx) {
650 if (!GapMask[
Idx % Factor])
653 if (LeafMask[
Idx / Factor] && LeafMask[
Idx / Factor] !=
C)
654 return {
nullptr, GapMask};
655 LeafMask[
Idx / Factor] =
C;
662 if (
auto *SVI = dyn_cast<ShuffleVectorInst>(WideMask)) {
666 unsigned NumSrcElts =
667 cast<FixedVectorType>(SVI->getOperand(1)->getType())->getNumElements();
670 NumSrcElts * 2, StartIndexes) &&
671 llvm::all_of(StartIndexes, [](
unsigned Start) { return Start == 0; }) &&
673 return Idx < (int)NumSrcElts;
684 return {
nullptr, GapMask};
687bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
690 if (!LoadedVal || !LoadedVal->
hasOneUse())
693 auto *LI = dyn_cast<LoadInst>(LoadedVal);
694 auto *
II = dyn_cast<IntrinsicInst>(LoadedVal);
699 assert(Factor &&
"unexpected deinterleave intrinsic");
706 LLVM_DEBUG(
dbgs() <<
"IA: Found a load with deinterleave intrinsic " << *DI
707 <<
" and factor = " << Factor <<
"\n");
710 if (
II->getIntrinsicID() != Intrinsic::masked_load &&
711 II->getIntrinsicID() != Intrinsic::vp_load)
715 APInt GapMask(Factor, 0);
716 std::tie(Mask, GapMask) =
723 if (GapMask.popcount() != Factor)
726 LLVM_DEBUG(
dbgs() <<
"IA: Found a vp.load or masked.load with deinterleave"
727 <<
" intrinsic " << *DI <<
" and factor = "
732 if (!TLI->lowerDeinterleaveIntrinsicToLoad(LoadedVal, Mask, DI))
737 DeadInsts.
insert(LoadedVal);
741bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
748 auto *
SI = dyn_cast<StoreInst>(StoredBy);
749 auto *
II = dyn_cast<IntrinsicInst>(StoredBy);
755 assert(Factor &&
"unexpected interleave intrinsic");
759 if (
II->getIntrinsicID() != Intrinsic::masked_store &&
760 II->getIntrinsicID() != Intrinsic::vp_store)
763 APInt GapMask(Factor, 0);
764 std::tie(Mask, GapMask) =
766 cast<VectorType>(InterleaveValues[0]->
getType()));
771 if (GapMask.popcount() != Factor)
774 LLVM_DEBUG(
dbgs() <<
"IA: Found a vp.store or masked.store with interleave"
775 <<
" intrinsic " << *IntII <<
" and factor = "
781 LLVM_DEBUG(
dbgs() <<
"IA: Found a store with interleave intrinsic "
782 << *IntII <<
" and factor = " << Factor <<
"\n");
786 if (!TLI->lowerInterleaveIntrinsicToStore(StoredBy, Mask, InterleaveValues))
790 DeadInsts.
insert(StoredBy);
795bool InterleavedAccessImpl::runOnFunction(
Function &
F) {
798 bool Changed =
false;
800 using namespace PatternMatch;
803 m_Intrinsic<Intrinsic::vp_load>())) ||
804 match(&
I, m_Intrinsic<Intrinsic::masked_load>()))
805 Changed |= lowerInterleavedLoad(&
I, DeadInsts);
808 m_Intrinsic<Intrinsic::vp_store>())) ||
809 match(&
I, m_Intrinsic<Intrinsic::masked_store>()))
810 Changed |= lowerInterleavedStore(&
I, DeadInsts);
812 if (
auto *
II = dyn_cast<IntrinsicInst>(&
I)) {
814 Changed |= lowerDeinterleaveIntrinsic(
II, DeadInsts);
816 Changed |= lowerInterleaveIntrinsic(
II, DeadInsts);
820 for (
auto *
I : DeadInsts)
821 I->eraseFromParent();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Expand Atomic instructions
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
expand Expand reduction intrinsics
static bool isDeInterleaveMask(ArrayRef< int > Mask, unsigned &Factor, unsigned &Index, unsigned MaxFactor, unsigned NumLoadElements)
Check if the mask is a DE-interleave mask for an interleaved load.
static void getGapMask(const Constant &MaskConst, unsigned Factor, unsigned LeafMaskLen, APInt &GapMask)
static cl::opt< bool > LowerInterleavedAccesses("lower-interleaved-accesses", cl::desc("Enable lowering interleaved accesses to intrinsics"), cl::init(true), cl::Hidden)
static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor, unsigned MaxFactor)
Check if the mask can be used in an interleaved store.
static Value * getMaskOperand(IntrinsicInst *II)
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
This file contains the declaration of the InterleavedAccessPass class, its corresponding pass name is...
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
unsigned getBitWidth() const
Return the number of bits in the APInt.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Represents analyses that only rely on functions' control flow.
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
static constexpr ElementCount getFixed(ScalarTy MinVal)
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
CallInst * CreateExtractVector(Type *DstType, Value *SrcVec, Value *Idx, const Twine &Name="")
Create a call to the vector.extract intrinsic.
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
void insert_range(Range &&R)
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
This instruction constructs a fixed permutation of two input vectors.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
LLVM_ABI bool isInterleave(unsigned Factor)
Return if this shuffle interleaves its two input vectors together.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
TwoOps_match< ValueOpTy, PointerOpTy, Instruction::Store > m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp)
Matches StoreInst.
bool match(Val *V, const Pattern &P)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
InterleavedRange< Range > interleaved(const Range &R, StringRef Separator=", ", StringRef Prefix="", StringRef Suffix="")
Output range R as a sequence of interleaved elements.
LLVM_ABI void initializeInterleavedAccessPass(PassRegistry &)
LLVM_ABI unsigned getDeinterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI unsigned getInterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.interleaveN intrinsics.
LLVM_ABI FunctionPass * createInterleavedAccessPass()
InterleavedAccess Pass - This pass identifies and matches interleaved memory accesses to target speci...
LLVM_ABI VectorType * getDeinterleavedVectorType(IntrinsicInst *DI)
Given a deinterleaveN intrinsic, return the (narrow) vector type of each factor.