LLVM 22.0.0git
InterleavedAccessPass.cpp
Go to the documentation of this file.
1//===- InterleavedAccessPass.cpp ------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the Interleaved Access pass, which identifies
10// interleaved memory accesses and transforms them into target specific
11// intrinsics.
12//
13// An interleaved load reads data from memory into several vectors, with
14// DE-interleaving the data on a factor. An interleaved store writes several
15// vectors to memory with RE-interleaving the data on a factor.
16//
17// As interleaved accesses are difficult to identified in CodeGen (mainly
18// because the VECTOR_SHUFFLE DAG node is quite different from the shufflevector
19// IR), we identify and transform them to intrinsics in this pass so the
20// intrinsics can be easily matched into target specific instructions later in
21// CodeGen.
22//
23// E.g. An interleaved load (Factor = 2):
24// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
25// %v0 = shuffle <8 x i32> %wide.vec, <8 x i32> poison, <0, 2, 4, 6>
26// %v1 = shuffle <8 x i32> %wide.vec, <8 x i32> poison, <1, 3, 5, 7>
27//
28// It could be transformed into a ld2 intrinsic in AArch64 backend or a vld2
29// intrinsic in ARM backend.
30//
31// In X86, this can be further optimized into a set of target
32// specific loads followed by an optimized sequence of shuffles.
33//
34// E.g. An interleaved store (Factor = 3):
35// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
36// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
37// store <12 x i32> %i.vec, <12 x i32>* %ptr
38//
39// It could be transformed into a st3 intrinsic in AArch64 backend or a vst3
40// intrinsic in ARM backend.
41//
42// Similarly, a set of interleaved stores can be transformed into an optimized
43// sequence of shuffles followed by a set of target specific stores for X86.
44//
45//===----------------------------------------------------------------------===//
46
47#include "llvm/ADT/ArrayRef.h"
48#include "llvm/ADT/DenseMap.h"
49#include "llvm/ADT/SetVector.h"
56#include "llvm/IR/Constants.h"
57#include "llvm/IR/Dominators.h"
58#include "llvm/IR/Function.h"
59#include "llvm/IR/IRBuilder.h"
61#include "llvm/IR/Instruction.h"
66#include "llvm/Pass.h"
69#include "llvm/Support/Debug.h"
73#include <cassert>
74#include <utility>
75
76using namespace llvm;
77
78#define DEBUG_TYPE "interleaved-access"
79
81 "lower-interleaved-accesses",
82 cl::desc("Enable lowering interleaved accesses to intrinsics"),
83 cl::init(true), cl::Hidden);
84
85namespace {
86
87class InterleavedAccessImpl {
88 friend class InterleavedAccess;
89
90public:
91 InterleavedAccessImpl() = default;
92 InterleavedAccessImpl(DominatorTree *DT, const TargetLowering *TLI)
93 : DT(DT), TLI(TLI), MaxFactor(TLI->getMaxSupportedInterleaveFactor()) {}
94 bool runOnFunction(Function &F);
95
96private:
97 DominatorTree *DT = nullptr;
98 const TargetLowering *TLI = nullptr;
99
100 /// The maximum supported interleave factor.
101 unsigned MaxFactor = 0u;
102
103 /// Transform an interleaved load into target specific intrinsics.
104 bool lowerInterleavedLoad(Instruction *Load,
105 SmallSetVector<Instruction *, 32> &DeadInsts);
106
107 /// Transform an interleaved store into target specific intrinsics.
108 bool lowerInterleavedStore(Instruction *Store,
109 SmallSetVector<Instruction *, 32> &DeadInsts);
110
111 /// Transform a load and a deinterleave intrinsic into target specific
112 /// instructions.
113 bool lowerDeinterleaveIntrinsic(IntrinsicInst *II,
114 SmallSetVector<Instruction *, 32> &DeadInsts);
115
116 /// Transform an interleave intrinsic and a store into target specific
117 /// instructions.
118 bool lowerInterleaveIntrinsic(IntrinsicInst *II,
119 SmallSetVector<Instruction *, 32> &DeadInsts);
120
121 /// Returns true if the uses of an interleaved load by the
122 /// extractelement instructions in \p Extracts can be replaced by uses of the
123 /// shufflevector instructions in \p Shuffles instead. If so, the necessary
124 /// replacements are also performed.
125 bool tryReplaceExtracts(ArrayRef<ExtractElementInst *> Extracts,
127
128 /// Given a number of shuffles of the form shuffle(binop(x,y)), convert them
129 /// to binop(shuffle(x), shuffle(y)) to allow the formation of an
130 /// interleaving load. Any newly created shuffles that operate on \p LI will
131 /// be added to \p Shuffles. Returns true, if any changes to the IR have been
132 /// made.
133 bool replaceBinOpShuffles(ArrayRef<ShuffleVectorInst *> BinOpShuffles,
134 SmallVectorImpl<ShuffleVectorInst *> &Shuffles,
135 Instruction *LI);
136};
137
138class InterleavedAccess : public FunctionPass {
139 InterleavedAccessImpl Impl;
140
141public:
142 static char ID;
143
144 InterleavedAccess() : FunctionPass(ID) {
146 }
147
148 StringRef getPassName() const override { return "Interleaved Access Pass"; }
149
150 bool runOnFunction(Function &F) override;
151
152 void getAnalysisUsage(AnalysisUsage &AU) const override {
153 AU.addRequired<DominatorTreeWrapperPass>();
154 AU.setPreservesCFG();
155 }
156};
157
158} // end anonymous namespace.
159
162 auto *DT = &FAM.getResult<DominatorTreeAnalysis>(F);
163 auto *TLI = TM->getSubtargetImpl(F)->getTargetLowering();
164 InterleavedAccessImpl Impl(DT, TLI);
165 bool Changed = Impl.runOnFunction(F);
166
167 if (!Changed)
168 return PreservedAnalyses::all();
169
172 return PA;
173}
174
175char InterleavedAccess::ID = 0;
176
177bool InterleavedAccess::runOnFunction(Function &F) {
178 if (skipFunction(F))
179 return false;
180
181 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
182 if (!TPC || !LowerInterleavedAccesses)
183 return false;
184
185 LLVM_DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName() << "\n");
186
187 Impl.DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
188 auto &TM = TPC->getTM<TargetMachine>();
189 Impl.TLI = TM.getSubtargetImpl(F)->getTargetLowering();
190 Impl.MaxFactor = Impl.TLI->getMaxSupportedInterleaveFactor();
191
192 return Impl.runOnFunction(F);
193}
194
196 "Lower interleaved memory accesses to target specific intrinsics", false,
197 false)
200 "Lower interleaved memory accesses to target specific intrinsics", false,
201 false)
202
204 return new InterleavedAccess();
205}
206
207/// Check if the mask is a DE-interleave mask for an interleaved load.
208///
209/// E.g. DE-interleave masks (Factor = 2) could be:
210/// <0, 2, 4, 6> (mask of index 0 to extract even elements)
211/// <1, 3, 5, 7> (mask of index 1 to extract odd elements)
212static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
213 unsigned &Index, unsigned MaxFactor,
214 unsigned NumLoadElements) {
215 if (Mask.size() < 2)
216 return false;
217
218 // Check potential Factors.
219 for (Factor = 2; Factor <= MaxFactor; Factor++) {
220 // Make sure we don't produce a load wider than the input load.
221 if (Mask.size() * Factor > NumLoadElements)
222 return false;
223 if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index))
224 return true;
225 }
226
227 return false;
228}
229
230/// Check if the mask can be used in an interleaved store.
231//
232/// It checks for a more general pattern than the RE-interleave mask.
233/// I.e. <x, y, ... z, x+1, y+1, ...z+1, x+2, y+2, ...z+2, ...>
234/// E.g. For a Factor of 2 (LaneLen=4): <4, 32, 5, 33, 6, 34, 7, 35>
235/// E.g. For a Factor of 3 (LaneLen=4): <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
236/// E.g. For a Factor of 4 (LaneLen=2): <8, 2, 12, 4, 9, 3, 13, 5>
237///
238/// The particular case of an RE-interleave mask is:
239/// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...>
240/// E.g. For a Factor of 2 (LaneLen=4): <0, 4, 1, 5, 2, 6, 3, 7>
241static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor,
242 unsigned MaxFactor) {
243 unsigned NumElts = SVI->getShuffleMask().size();
244 if (NumElts < 4)
245 return false;
246
247 // Check potential Factors.
248 for (Factor = 2; Factor <= MaxFactor; Factor++) {
249 if (SVI->isInterleave(Factor))
250 return true;
251 }
252
253 return false;
254}
255
257 switch (II->getIntrinsicID()) {
258 default:
259 llvm_unreachable("Unexpected intrinsic");
260 case Intrinsic::vp_load:
261 return II->getOperand(1);
262 case Intrinsic::masked_load:
263 return II->getOperand(2);
264 case Intrinsic::vp_store:
265 return II->getOperand(2);
266 case Intrinsic::masked_store:
267 return II->getOperand(3);
268 }
269}
270
271// Return a pair of
272// (1) The corresponded deinterleaved mask, or nullptr if there is no valid
273// mask.
274// (2) Some mask effectively skips a certain field, and this element is a mask
275// in which inactive lanes represent fields that are skipped (i.e. "gaps").
276static std::pair<Value *, APInt> getMask(Value *WideMask, unsigned Factor,
277 ElementCount LeafValueEC);
278
279static std::pair<Value *, APInt> getMask(Value *WideMask, unsigned Factor,
280 VectorType *LeafValueTy) {
281 return getMask(WideMask, Factor, LeafValueTy->getElementCount());
282}
283
284bool InterleavedAccessImpl::lowerInterleavedLoad(
286 if (isa<ScalableVectorType>(Load->getType()))
287 return false;
288
289 auto *LI = dyn_cast<LoadInst>(Load);
290 auto *II = dyn_cast<IntrinsicInst>(Load);
291 if (!LI && !II)
292 return false;
293
294 if (LI && !LI->isSimple())
295 return false;
296
297 // Check if all users of this load are shufflevectors. If we encounter any
298 // users that are extractelement instructions or binary operators, we save
299 // them to later check if they can be modified to extract from one of the
300 // shufflevectors instead of the load.
301
304 // BinOpShuffles need to be handled a single time in case both operands of the
305 // binop are the same load.
307
308 for (auto *User : Load->users()) {
309 auto *Extract = dyn_cast<ExtractElementInst>(User);
310 if (Extract && isa<ConstantInt>(Extract->getIndexOperand())) {
311 Extracts.push_back(Extract);
312 continue;
313 }
314 if (auto *BI = dyn_cast<BinaryOperator>(User)) {
315 using namespace PatternMatch;
316 if (!BI->user_empty() &&
317 all_of(BI->users(), match_fn(m_Shuffle(m_Value(), m_Undef())))) {
318 for (auto *SVI : BI->users())
319 BinOpShuffles.insert(cast<ShuffleVectorInst>(SVI));
320 continue;
321 }
322 }
324 if (!SVI || !isa<UndefValue>(SVI->getOperand(1)))
325 return false;
326
327 Shuffles.push_back(SVI);
328 }
329
330 if (Shuffles.empty() && BinOpShuffles.empty())
331 return false;
332
333 unsigned Factor, Index;
334
335 unsigned NumLoadElements =
336 cast<FixedVectorType>(Load->getType())->getNumElements();
337 auto *FirstSVI = Shuffles.size() > 0 ? Shuffles[0] : BinOpShuffles[0];
338 // Check if the first shufflevector is DE-interleave shuffle.
339 if (!isDeInterleaveMask(FirstSVI->getShuffleMask(), Factor, Index, MaxFactor,
340 NumLoadElements))
341 return false;
342
343 // Holds the corresponding index for each DE-interleave shuffle.
345
346 VectorType *VecTy = cast<VectorType>(FirstSVI->getType());
347
348 // Check if other shufflevectors are also DE-interleaved of the same type
349 // and factor as the first shufflevector.
350 for (auto *Shuffle : Shuffles) {
351 if (Shuffle->getType() != VecTy)
352 return false;
354 Shuffle->getShuffleMask(), Factor, Index))
355 return false;
356
357 assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
358 Indices.push_back(Index);
359 }
360 for (auto *Shuffle : BinOpShuffles) {
361 if (Shuffle->getType() != VecTy)
362 return false;
364 Shuffle->getShuffleMask(), Factor, Index))
365 return false;
366
367 assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
368
369 if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(0) == Load)
370 Indices.push_back(Index);
371 if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(1) == Load)
372 Indices.push_back(Index);
373 }
374
375 // Try and modify users of the load that are extractelement instructions to
376 // use the shufflevector instructions instead of the load.
377 if (!tryReplaceExtracts(Extracts, Shuffles))
378 return false;
379
380 bool BinOpShuffleChanged =
381 replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, Load);
382
383 Value *Mask = nullptr;
384 auto GapMask = APInt::getAllOnes(Factor);
385 if (LI) {
386 LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *Load << "\n");
387 } else {
388 // Check mask operand. Handle both all-true/false and interleaved mask.
389 std::tie(Mask, GapMask) = getMask(getMaskOperand(II), Factor, VecTy);
390 if (!Mask)
391 return false;
392
393 LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.load or masked.load: "
394 << *Load << "\n");
395 LLVM_DEBUG(dbgs() << "IA: With nominal factor " << Factor
396 << " and actual factor " << GapMask.popcount() << "\n");
397 }
398
399 // Try to create target specific intrinsics to replace the load and
400 // shuffles.
401 if (!TLI->lowerInterleavedLoad(cast<Instruction>(Load), Mask, Shuffles,
402 Indices, Factor, GapMask))
403 // If Extracts is not empty, tryReplaceExtracts made changes earlier.
404 return !Extracts.empty() || BinOpShuffleChanged;
405
406 DeadInsts.insert_range(Shuffles);
407
408 DeadInsts.insert(Load);
409 return true;
410}
411
412bool InterleavedAccessImpl::replaceBinOpShuffles(
413 ArrayRef<ShuffleVectorInst *> BinOpShuffles,
415 for (auto *SVI : BinOpShuffles) {
416 BinaryOperator *BI = cast<BinaryOperator>(SVI->getOperand(0));
417 Type *BIOp0Ty = BI->getOperand(0)->getType();
418 ArrayRef<int> Mask = SVI->getShuffleMask();
419 assert(all_of(Mask, [&](int Idx) {
420 return Idx < (int)cast<FixedVectorType>(BIOp0Ty)->getNumElements();
421 }));
422
423 BasicBlock::iterator insertPos = SVI->getIterator();
424 auto *NewSVI1 =
425 new ShuffleVectorInst(BI->getOperand(0), PoisonValue::get(BIOp0Ty),
426 Mask, SVI->getName(), insertPos);
427 auto *NewSVI2 = new ShuffleVectorInst(
428 BI->getOperand(1), PoisonValue::get(BI->getOperand(1)->getType()), Mask,
429 SVI->getName(), insertPos);
431 BI->getOpcode(), NewSVI1, NewSVI2, BI, BI->getName(), insertPos);
432 SVI->replaceAllUsesWith(NewBI);
433 LLVM_DEBUG(dbgs() << " Replaced: " << *BI << "\n And : " << *SVI
434 << "\n With : " << *NewSVI1 << "\n And : "
435 << *NewSVI2 << "\n And : " << *NewBI << "\n");
437 if (NewSVI1->getOperand(0) == Load)
438 Shuffles.push_back(NewSVI1);
439 if (NewSVI2->getOperand(0) == Load)
440 Shuffles.push_back(NewSVI2);
441 }
442
443 return !BinOpShuffles.empty();
444}
445
446bool InterleavedAccessImpl::tryReplaceExtracts(
449 // If there aren't any extractelement instructions to modify, there's nothing
450 // to do.
451 if (Extracts.empty())
452 return true;
453
454 // Maps extractelement instructions to vector-index pairs. The extractlement
455 // instructions will be modified to use the new vector and index operands.
457
458 for (auto *Extract : Extracts) {
459 // The vector index that is extracted.
460 auto *IndexOperand = cast<ConstantInt>(Extract->getIndexOperand());
461 auto Index = IndexOperand->getSExtValue();
462
463 // Look for a suitable shufflevector instruction. The goal is to modify the
464 // extractelement instruction (which uses an interleaved load) to use one
465 // of the shufflevector instructions instead of the load.
466 for (auto *Shuffle : Shuffles) {
467 // If the shufflevector instruction doesn't dominate the extract, we
468 // can't create a use of it.
469 if (!DT->dominates(Shuffle, Extract))
470 continue;
471
472 // Inspect the indices of the shufflevector instruction. If the shuffle
473 // selects the same index that is extracted, we can modify the
474 // extractelement instruction.
475 SmallVector<int, 4> Indices;
476 Shuffle->getShuffleMask(Indices);
477 for (unsigned I = 0; I < Indices.size(); ++I)
478 if (Indices[I] == Index) {
479 assert(Extract->getOperand(0) == Shuffle->getOperand(0) &&
480 "Vector operations do not match");
481 ReplacementMap[Extract] = std::make_pair(Shuffle, I);
482 break;
483 }
484
485 // If we found a suitable shufflevector instruction, stop looking.
486 if (ReplacementMap.count(Extract))
487 break;
488 }
489
490 // If we did not find a suitable shufflevector instruction, the
491 // extractelement instruction cannot be modified, so we must give up.
492 if (!ReplacementMap.count(Extract))
493 return false;
494 }
495
496 // Finally, perform the replacements.
497 IRBuilder<> Builder(Extracts[0]->getContext());
498 for (auto &Replacement : ReplacementMap) {
499 auto *Extract = Replacement.first;
500 auto *Vector = Replacement.second.first;
501 auto Index = Replacement.second.second;
502 Builder.SetInsertPoint(Extract);
503 Extract->replaceAllUsesWith(Builder.CreateExtractElement(Vector, Index));
504 Extract->eraseFromParent();
505 }
506
507 return true;
508}
509
510bool InterleavedAccessImpl::lowerInterleavedStore(
512 Value *StoredValue;
513 auto *SI = dyn_cast<StoreInst>(Store);
514 auto *II = dyn_cast<IntrinsicInst>(Store);
515 if (SI) {
516 if (!SI->isSimple())
517 return false;
518 StoredValue = SI->getValueOperand();
519 } else {
520 assert(II->getIntrinsicID() == Intrinsic::vp_store ||
521 II->getIntrinsicID() == Intrinsic::masked_store);
522 StoredValue = II->getArgOperand(0);
523 }
524
525 auto *SVI = dyn_cast<ShuffleVectorInst>(StoredValue);
526 if (!SVI || !SVI->hasOneUse() || isa<ScalableVectorType>(SVI->getType()))
527 return false;
528
529 unsigned NumStoredElements =
530 cast<FixedVectorType>(SVI->getType())->getNumElements();
531 // Check if the shufflevector is RE-interleave shuffle.
532 unsigned Factor;
533 if (!isReInterleaveMask(SVI, Factor, MaxFactor))
534 return false;
535 assert(NumStoredElements % Factor == 0 &&
536 "number of stored element should be a multiple of Factor");
537
538 Value *Mask = nullptr;
539 auto GapMask = APInt::getAllOnes(Factor);
540 if (SI) {
541 LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *Store << "\n");
542 } else {
543 // Check mask operand. Handle both all-true/false and interleaved mask.
544 unsigned LaneMaskLen = NumStoredElements / Factor;
545 std::tie(Mask, GapMask) = getMask(getMaskOperand(II), Factor,
546 ElementCount::getFixed(LaneMaskLen));
547 if (!Mask)
548 return false;
549
550 LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.store or masked.store: "
551 << *Store << "\n");
552 LLVM_DEBUG(dbgs() << "IA: With nominal factor " << Factor
553 << " and actual factor " << GapMask.popcount() << "\n");
554 }
555
556 // Try to create target specific intrinsics to replace the store and
557 // shuffle.
558 if (!TLI->lowerInterleavedStore(Store, Mask, SVI, Factor, GapMask))
559 return false;
560
561 // Already have a new target specific interleaved store. Erase the old store.
562 DeadInsts.insert(Store);
563 DeadInsts.insert(SVI);
564 return true;
565}
566
567// A wide mask <1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0> could be used to skip the
568// last field in a factor-of-three interleaved store or deinterleaved load (in
569// which case LeafMaskLen is 4). Such (wide) mask is also known as gap mask.
570// This helper function tries to detect this pattern and return the actual
571// factor we're accessing, which is 2 in this example.
572static void getGapMask(const Constant &MaskConst, unsigned Factor,
573 unsigned LeafMaskLen, APInt &GapMask) {
574 assert(GapMask.getBitWidth() == Factor);
575 for (unsigned F = 0U; F < Factor; ++F) {
576 bool AllZero = true;
577 for (unsigned Idx = 0U; Idx < LeafMaskLen; ++Idx) {
578 Constant *C = MaskConst.getAggregateElement(F + Idx * Factor);
579 if (!C->isZeroValue()) {
580 AllZero = false;
581 break;
582 }
583 }
584 // All mask bits on this field are zero, skipping it.
585 if (AllZero)
586 GapMask.clearBit(F);
587 }
588}
589
590static std::pair<Value *, APInt> getMask(Value *WideMask, unsigned Factor,
591 ElementCount LeafValueEC) {
592 auto GapMask = APInt::getAllOnes(Factor);
593
594 if (auto *IMI = dyn_cast<IntrinsicInst>(WideMask)) {
595 if (unsigned F = getInterleaveIntrinsicFactor(IMI->getIntrinsicID());
596 F && F == Factor) {
597 Value *RefArg = nullptr;
598 // Check if all the intrinsic arguments are the same, except those that
599 // are zeros, which we mark as gaps in the gap mask.
600 for (auto [Idx, Arg] : enumerate(IMI->args())) {
601 if (auto *C = dyn_cast<Constant>(Arg); C && C->isZeroValue()) {
602 GapMask.clearBit(Idx);
603 continue;
604 }
605
606 if (!RefArg)
607 RefArg = Arg;
608 else if (RefArg != Arg)
609 return {nullptr, GapMask};
610 }
611
612 // In a very rare occasion, all the intrinsic arguments might be zeros,
613 // in which case we still want to return an all-zeros constant instead of
614 // nullptr.
615 return {RefArg ? RefArg : IMI->getArgOperand(0), GapMask};
616 }
617 }
618
619 // Masks that are assembled from bitwise AND.
620 if (auto *AndOp = dyn_cast<BinaryOperator>(WideMask);
621 AndOp && AndOp->getOpcode() == Instruction::And) {
622 auto [MaskLHS, GapMaskLHS] =
623 getMask(AndOp->getOperand(0), Factor, LeafValueEC);
624 auto [MaskRHS, GapMaskRHS] =
625 getMask(AndOp->getOperand(1), Factor, LeafValueEC);
626 if (!MaskLHS || !MaskRHS)
627 return {nullptr, GapMask};
628 // Using IRBuilder here so that any trivial constants could be folded right
629 // away.
630 return {IRBuilder<>(AndOp).CreateAnd(MaskLHS, MaskRHS),
631 GapMaskLHS & GapMaskRHS};
632 }
633
634 if (auto *ConstMask = dyn_cast<Constant>(WideMask)) {
635 if (auto *Splat = ConstMask->getSplatValue())
636 // All-ones or all-zeros mask.
637 return {ConstantVector::getSplat(LeafValueEC, Splat), GapMask};
638
639 if (LeafValueEC.isFixed()) {
640 unsigned LeafMaskLen = LeafValueEC.getFixedValue();
641 // First, check if we use a gap mask to skip some of the factors / fields.
642 getGapMask(*ConstMask, Factor, LeafMaskLen, GapMask);
643
644 SmallVector<Constant *, 8> LeafMask(LeafMaskLen, nullptr);
645 // If this is a fixed-length constant mask, each lane / leaf has to
646 // use the same mask. This is done by checking if every group with Factor
647 // number of elements in the interleaved mask has homogeneous values.
648 for (unsigned Idx = 0U; Idx < LeafMaskLen * Factor; ++Idx) {
649 if (!GapMask[Idx % Factor])
650 continue;
651 Constant *C = ConstMask->getAggregateElement(Idx);
652 if (LeafMask[Idx / Factor] && LeafMask[Idx / Factor] != C)
653 return {nullptr, GapMask};
654 LeafMask[Idx / Factor] = C;
655 }
656
657 return {ConstantVector::get(LeafMask), GapMask};
658 }
659 }
660
661 if (auto *SVI = dyn_cast<ShuffleVectorInst>(WideMask)) {
662 Type *Op1Ty = SVI->getOperand(1)->getType();
663 if (!isa<FixedVectorType>(Op1Ty))
664 return {nullptr, GapMask};
665
666 // Check that the shuffle mask is: a) an interleave, b) all of the same
667 // set of the elements, and c) contained by the first source. (c) could
668 // be relaxed if desired.
669 unsigned NumSrcElts =
670 cast<FixedVectorType>(SVI->getOperand(1)->getType())->getNumElements();
671 SmallVector<unsigned> StartIndexes;
672 if (ShuffleVectorInst::isInterleaveMask(SVI->getShuffleMask(), Factor,
673 NumSrcElts * 2, StartIndexes) &&
674 llvm::all_of(StartIndexes, [](unsigned Start) { return Start == 0; }) &&
675 llvm::all_of(SVI->getShuffleMask(), [&NumSrcElts](int Idx) {
676 return Idx < (int)NumSrcElts;
677 })) {
678 auto *LeafMaskTy =
679 VectorType::get(Type::getInt1Ty(SVI->getContext()), LeafValueEC);
680 IRBuilder<> Builder(SVI);
681 return {Builder.CreateExtractVector(LeafMaskTy, SVI->getOperand(0),
682 uint64_t(0)),
683 GapMask};
684 }
685 }
686
687 return {nullptr, GapMask};
688}
689
690bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
692 Instruction *LoadedVal = dyn_cast<Instruction>(DI->getOperand(0));
693 if (!LoadedVal || !LoadedVal->hasOneUse())
694 return false;
695
696 auto *LI = dyn_cast<LoadInst>(LoadedVal);
697 auto *II = dyn_cast<IntrinsicInst>(LoadedVal);
698 if (!LI && !II)
699 return false;
700
701 const unsigned Factor = getDeinterleaveIntrinsicFactor(DI->getIntrinsicID());
702 assert(Factor && "unexpected deinterleave intrinsic");
703
704 Value *Mask = nullptr;
705 if (LI) {
706 if (!LI->isSimple())
707 return false;
708
709 LLVM_DEBUG(dbgs() << "IA: Found a load with deinterleave intrinsic " << *DI
710 << " and factor = " << Factor << "\n");
711 } else {
712 assert(II);
713 if (II->getIntrinsicID() != Intrinsic::masked_load &&
714 II->getIntrinsicID() != Intrinsic::vp_load)
715 return false;
716
717 // Check mask operand. Handle both all-true/false and interleaved mask.
718 APInt GapMask(Factor, 0);
719 std::tie(Mask, GapMask) =
721 if (!Mask)
722 return false;
723 // We haven't supported gap mask if it's deinterleaving using intrinsics.
724 // Yet it is possible that we already changed the IR, hence returning true
725 // here.
726 if (GapMask.popcount() != Factor)
727 return true;
728
729 LLVM_DEBUG(dbgs() << "IA: Found a vp.load or masked.load with deinterleave"
730 << " intrinsic " << *DI << " and factor = "
731 << Factor << "\n");
732 }
733
734 // Try and match this with target specific intrinsics.
735 if (!TLI->lowerDeinterleaveIntrinsicToLoad(LoadedVal, Mask, DI))
736 return false;
737
738 DeadInsts.insert(DI);
739 // We now have a target-specific load, so delete the old one.
740 DeadInsts.insert(LoadedVal);
741 return true;
742}
743
744bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
746 if (!IntII->hasOneUse())
747 return false;
748 Instruction *StoredBy = dyn_cast<Instruction>(IntII->user_back());
749 if (!StoredBy)
750 return false;
751 auto *SI = dyn_cast<StoreInst>(StoredBy);
752 auto *II = dyn_cast<IntrinsicInst>(StoredBy);
753 if (!SI && !II)
754 return false;
755
756 SmallVector<Value *, 8> InterleaveValues(IntII->args());
757 const unsigned Factor = getInterleaveIntrinsicFactor(IntII->getIntrinsicID());
758 assert(Factor && "unexpected interleave intrinsic");
759
760 Value *Mask = nullptr;
761 if (II) {
762 if (II->getIntrinsicID() != Intrinsic::masked_store &&
763 II->getIntrinsicID() != Intrinsic::vp_store)
764 return false;
765 // Check mask operand. Handle both all-true/false and interleaved mask.
766 APInt GapMask(Factor, 0);
767 std::tie(Mask, GapMask) =
768 getMask(getMaskOperand(II), Factor,
769 cast<VectorType>(InterleaveValues[0]->getType()));
770 if (!Mask)
771 return false;
772 // We haven't supported gap mask if it's interleaving using intrinsics. Yet
773 // it is possible that we already changed the IR, hence returning true here.
774 if (GapMask.popcount() != Factor)
775 return true;
776
777 LLVM_DEBUG(dbgs() << "IA: Found a vp.store or masked.store with interleave"
778 << " intrinsic " << *IntII << " and factor = "
779 << Factor << "\n");
780 } else {
781 if (!SI->isSimple())
782 return false;
783
784 LLVM_DEBUG(dbgs() << "IA: Found a store with interleave intrinsic "
785 << *IntII << " and factor = " << Factor << "\n");
786 }
787
788 // Try and match this with target specific intrinsics.
789 if (!TLI->lowerInterleaveIntrinsicToStore(StoredBy, Mask, InterleaveValues))
790 return false;
791
792 // We now have a target-specific store, so delete the old one.
793 DeadInsts.insert(StoredBy);
794 DeadInsts.insert(IntII);
795 return true;
796}
797
798bool InterleavedAccessImpl::runOnFunction(Function &F) {
799 // Holds dead instructions that will be erased later.
801 bool Changed = false;
802
803 using namespace PatternMatch;
804 for (auto &I : instructions(F)) {
808 Changed |= lowerInterleavedLoad(&I, DeadInsts);
809
813 Changed |= lowerInterleavedStore(&I, DeadInsts);
814
815 if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
816 if (getDeinterleaveIntrinsicFactor(II->getIntrinsicID()))
817 Changed |= lowerDeinterleaveIntrinsic(II, DeadInsts);
818 else if (getInterleaveIntrinsicFactor(II->getIntrinsicID()))
819 Changed |= lowerInterleaveIntrinsic(II, DeadInsts);
820 }
821 }
822
823 for (auto *I : DeadInsts)
824 I->eraseFromParent();
825
826 return Changed;
827}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Expand Atomic instructions
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
static bool isDeInterleaveMask(ArrayRef< int > Mask, unsigned &Factor, unsigned &Index, unsigned MaxFactor, unsigned NumLoadElements)
Check if the mask is a DE-interleave mask for an interleaved load.
static void getGapMask(const Constant &MaskConst, unsigned Factor, unsigned LeafMaskLen, APInt &GapMask)
static cl::opt< bool > LowerInterleavedAccesses("lower-interleaved-accesses", cl::desc("Enable lowering interleaved accesses to intrinsics"), cl::init(true), cl::Hidden)
static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor, unsigned MaxFactor)
Check if the mask can be used in an interleaved store.
static Value * getMaskOperand(IntrinsicInst *II)
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
This file contains the declaration of the InterleavedAccessPass class, its corresponding pass name is...
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1406
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:142
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
BinaryOps getOpcode() const
Definition InstrTypes.h:374
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:219
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:161
Analysis pass which computes a DominatorTree.
Definition Dominators.h:284
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:322
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2780
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
void insert_range(Range &&R)
Definition SetVector.h:193
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:99
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:168
This instruction constructs a fixed permutation of two input vectors.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
LLVM_ABI bool isInterleave(unsigned Factor)
Return if this shuffle interleaves its two input vectors together.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:356
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
Definition Type.cpp:294
Value * getOperand(unsigned i) const
Definition User.h:232
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
Base class of all SIMD vector types.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
Definition TypeSize.h:172
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
TwoOps_match< ValueOpTy, PointerOpTy, Instruction::Store > m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp)
Matches StoreInst.
bool match(Val *V, const Pattern &P)
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_Undef()
Match an arbitrary undef constant.
MatchFunctor< Val, Pattern > match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
initializer< Ty > init(const Ty &Val)
Context & getContext() const
Definition BasicBlock.h:99
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1705
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2452
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
LLVM_ABI void initializeInterleavedAccessPass(PassRegistry &)
LLVM_ABI unsigned getDeinterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI unsigned getInterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.interleaveN intrinsics.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI FunctionPass * createInterleavedAccessPass()
InterleavedAccess Pass - This pass identifies and matches interleaved memory accesses to target speci...
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI VectorType * getDeinterleavedVectorType(IntrinsicInst *DI)
Given a deinterleaveN intrinsic, return the (narrow) vector type of each factor.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.