LLVM 22.0.0git
Scalarizer.cpp
Go to the documentation of this file.
1//===- Scalarizer.cpp - Scalarize vector operations -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass converts vector operations into scalar operations (or, optionally,
10// operations on smaller vector widths), in order to expose optimization
11// opportunities on the individual scalar operations.
12// It is mainly intended for targets that do not have vector units, but it
13// may also be useful for revectorizing code to different vector widths.
14//
15//===----------------------------------------------------------------------===//
16
20#include "llvm/ADT/Twine.h"
23#include "llvm/IR/Argument.h"
24#include "llvm/IR/BasicBlock.h"
25#include "llvm/IR/Constants.h"
26#include "llvm/IR/DataLayout.h"
28#include "llvm/IR/Dominators.h"
29#include "llvm/IR/Function.h"
30#include "llvm/IR/IRBuilder.h"
31#include "llvm/IR/InstVisitor.h"
32#include "llvm/IR/InstrTypes.h"
33#include "llvm/IR/Instruction.h"
35#include "llvm/IR/Intrinsics.h"
36#include "llvm/IR/LLVMContext.h"
37#include "llvm/IR/Module.h"
38#include "llvm/IR/Type.h"
39#include "llvm/IR/Value.h"
43#include <cassert>
44#include <cstdint>
45#include <iterator>
46#include <map>
47#include <utility>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "scalarizer"
52
53namespace {
54
55BasicBlock::iterator skipPastPhiNodesAndDbg(BasicBlock::iterator Itr) {
56 BasicBlock *BB = Itr->getParent();
57 if (isa<PHINode>(Itr))
58 Itr = BB->getFirstInsertionPt();
59 if (Itr != BB->end())
60 Itr = skipDebugIntrinsics(Itr);
61 return Itr;
62}
63
64// Used to store the scattered form of a vector.
65using ValueVector = SmallVector<Value *, 8>;
66
67// Used to map a vector Value and associated type to its scattered form.
68// The associated type is only non-null for pointer values that are "scattered"
69// when used as pointer operands to load or store.
70//
71// We use std::map because we want iterators to persist across insertion and
72// because the values are relatively large.
73using ScatterMap = std::map<std::pair<Value *, Type *>, ValueVector>;
74
75// Lists Instructions that have been replaced with scalar implementations,
76// along with a pointer to their scattered forms.
78
79struct VectorSplit {
80 // The type of the vector.
81 FixedVectorType *VecTy = nullptr;
82
83 // The number of elements packed in a fragment (other than the remainder).
84 unsigned NumPacked = 0;
85
86 // The number of fragments (scalars or smaller vectors) into which the vector
87 // shall be split.
88 unsigned NumFragments = 0;
89
90 // The type of each complete fragment.
91 Type *SplitTy = nullptr;
92
93 // The type of the remainder (last) fragment; null if all fragments are
94 // complete.
95 Type *RemainderTy = nullptr;
96
97 Type *getFragmentType(unsigned I) const {
98 return RemainderTy && I == NumFragments - 1 ? RemainderTy : SplitTy;
99 }
100};
101
102// Provides a very limited vector-like interface for lazily accessing one
103// component of a scattered vector or vector pointer.
104class Scatterer {
105public:
106 Scatterer() = default;
107
108 // Scatter V into Size components. If new instructions are needed,
109 // insert them before BBI in BB. If Cache is nonnull, use it to cache
110 // the results.
111 Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
112 const VectorSplit &VS, ValueVector *cachePtr = nullptr);
113
114 // Return component I, creating a new Value for it if necessary.
115 Value *operator[](unsigned I);
116
117 // Return the number of components.
118 unsigned size() const { return VS.NumFragments; }
119
120private:
121 BasicBlock *BB;
123 Value *V;
124 VectorSplit VS;
125 bool IsPointer;
126 ValueVector *CachePtr;
127 ValueVector Tmp;
128};
129
130// FCmpSplitter(FCI)(Builder, X, Y, Name) uses Builder to create an FCmp
131// called Name that compares X and Y in the same way as FCI.
132struct FCmpSplitter {
133 FCmpSplitter(FCmpInst &fci) : FCI(fci) {}
134
135 Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
136 const Twine &Name) const {
137 return Builder.CreateFCmp(FCI.getPredicate(), Op0, Op1, Name);
138 }
139
140 FCmpInst &FCI;
141};
142
143// ICmpSplitter(ICI)(Builder, X, Y, Name) uses Builder to create an ICmp
144// called Name that compares X and Y in the same way as ICI.
145struct ICmpSplitter {
146 ICmpSplitter(ICmpInst &ici) : ICI(ici) {}
147
148 Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
149 const Twine &Name) const {
150 return Builder.CreateICmp(ICI.getPredicate(), Op0, Op1, Name);
151 }
152
153 ICmpInst &ICI;
154};
155
156// UnarySplitter(UO)(Builder, X, Name) uses Builder to create
157// a unary operator like UO called Name with operand X.
158struct UnarySplitter {
159 UnarySplitter(UnaryOperator &uo) : UO(uo) {}
160
161 Value *operator()(IRBuilder<> &Builder, Value *Op, const Twine &Name) const {
162 return Builder.CreateUnOp(UO.getOpcode(), Op, Name);
163 }
164
165 UnaryOperator &UO;
166};
167
168// BinarySplitter(BO)(Builder, X, Y, Name) uses Builder to create
169// a binary operator like BO called Name with operands X and Y.
170struct BinarySplitter {
171 BinarySplitter(BinaryOperator &bo) : BO(bo) {}
172
173 Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
174 const Twine &Name) const {
175 return Builder.CreateBinOp(BO.getOpcode(), Op0, Op1, Name);
176 }
177
178 BinaryOperator &BO;
179};
180
181// Information about a load or store that we're scalarizing.
182struct VectorLayout {
183 VectorLayout() = default;
184
185 // Return the alignment of fragment Frag.
186 Align getFragmentAlign(unsigned Frag) {
187 return commonAlignment(VecAlign, Frag * SplitSize);
188 }
189
190 // The split of the underlying vector type.
191 VectorSplit VS;
192
193 // The alignment of the vector.
194 Align VecAlign;
195
196 // The size of each (non-remainder) fragment in bytes.
197 uint64_t SplitSize = 0;
198};
199
200static bool isStructOfMatchingFixedVectors(Type *Ty) {
201 if (!isa<StructType>(Ty))
202 return false;
203 unsigned StructSize = Ty->getNumContainedTypes();
204 if (StructSize < 1)
205 return false;
206 FixedVectorType *VecTy = dyn_cast<FixedVectorType>(Ty->getContainedType(0));
207 if (!VecTy)
208 return false;
209 unsigned VecSize = VecTy->getNumElements();
210 for (unsigned I = 1; I < StructSize; I++) {
211 VecTy = dyn_cast<FixedVectorType>(Ty->getContainedType(I));
212 if (!VecTy || VecSize != VecTy->getNumElements())
213 return false;
214 }
215 return true;
216}
217
218/// Concatenate the given fragments to a single vector value of the type
219/// described in @p VS.
220static Value *concatenate(IRBuilder<> &Builder, ArrayRef<Value *> Fragments,
221 const VectorSplit &VS, Twine Name) {
222 unsigned NumElements = VS.VecTy->getNumElements();
223 SmallVector<int> ExtendMask;
224 SmallVector<int> InsertMask;
225
226 if (VS.NumPacked > 1) {
227 // Prepare the shufflevector masks once and re-use them for all
228 // fragments.
229 ExtendMask.resize(NumElements, -1);
230 for (unsigned I = 0; I < VS.NumPacked; ++I)
231 ExtendMask[I] = I;
232
233 InsertMask.resize(NumElements);
234 for (unsigned I = 0; I < NumElements; ++I)
235 InsertMask[I] = I;
236 }
237
238 Value *Res = PoisonValue::get(VS.VecTy);
239 for (unsigned I = 0; I < VS.NumFragments; ++I) {
240 Value *Fragment = Fragments[I];
241
242 unsigned NumPacked = VS.NumPacked;
243 if (I == VS.NumFragments - 1 && VS.RemainderTy) {
244 if (auto *RemVecTy = dyn_cast<FixedVectorType>(VS.RemainderTy))
245 NumPacked = RemVecTy->getNumElements();
246 else
247 NumPacked = 1;
248 }
249
250 if (NumPacked == 1) {
251 Res = Builder.CreateInsertElement(Res, Fragment, I * VS.NumPacked,
252 Name + ".upto" + Twine(I));
253 } else {
254 Fragment = Builder.CreateShuffleVector(Fragment, Fragment, ExtendMask);
255 if (I == 0) {
256 Res = Fragment;
257 } else {
258 for (unsigned J = 0; J < NumPacked; ++J)
259 InsertMask[I * VS.NumPacked + J] = NumElements + J;
260 Res = Builder.CreateShuffleVector(Res, Fragment, InsertMask,
261 Name + ".upto" + Twine(I));
262 for (unsigned J = 0; J < NumPacked; ++J)
263 InsertMask[I * VS.NumPacked + J] = I * VS.NumPacked + J;
264 }
265 }
266 }
267
268 return Res;
269}
270
271class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
272public:
273 ScalarizerVisitor(DominatorTree *DT, const TargetTransformInfo *TTI,
275 : DT(DT), TTI(TTI),
276 ScalarizeVariableInsertExtract(Options.ScalarizeVariableInsertExtract),
277 ScalarizeLoadStore(Options.ScalarizeLoadStore),
278 ScalarizeMinBits(Options.ScalarizeMinBits) {}
279
280 bool visit(Function &F);
281
282 // InstVisitor methods. They return true if the instruction was scalarized,
283 // false if nothing changed.
284 bool visitInstruction(Instruction &I) { return false; }
285 bool visitSelectInst(SelectInst &SI);
286 bool visitICmpInst(ICmpInst &ICI);
287 bool visitFCmpInst(FCmpInst &FCI);
291 bool visitCastInst(CastInst &CI);
292 bool visitBitCastInst(BitCastInst &BCI);
297 bool visitPHINode(PHINode &PHI);
298 bool visitLoadInst(LoadInst &LI);
299 bool visitStoreInst(StoreInst &SI);
300 bool visitCallInst(CallInst &ICI);
301 bool visitFreezeInst(FreezeInst &FI);
302
303private:
304 Scatterer scatter(Instruction *Point, Value *V, const VectorSplit &VS);
305 void gather(Instruction *Op, const ValueVector &CV, const VectorSplit &VS);
306 void replaceUses(Instruction *Op, Value *CV);
307 bool canTransferMetadata(unsigned Kind);
308 void transferMetadataAndIRFlags(Instruction *Op, const ValueVector &CV);
309 std::optional<VectorSplit> getVectorSplit(Type *Ty);
310 std::optional<VectorLayout> getVectorLayout(Type *Ty, Align Alignment,
311 const DataLayout &DL);
312 bool finish();
313
314 template<typename T> bool splitUnary(Instruction &, const T &);
315 template<typename T> bool splitBinary(Instruction &, const T &);
316
317 bool splitCall(CallInst &CI);
318
319 ScatterMap Scattered;
320 GatherList Gathered;
321 bool Scalarized;
322
323 SmallVector<WeakTrackingVH, 32> PotentiallyDeadInstrs;
324
325 DominatorTree *DT;
327
328 const bool ScalarizeVariableInsertExtract;
329 const bool ScalarizeLoadStore;
330 const unsigned ScalarizeMinBits;
331};
332
333class ScalarizerLegacyPass : public FunctionPass {
334public:
335 static char ID;
337 ScalarizerLegacyPass() : FunctionPass(ID), Options() {}
338 ScalarizerLegacyPass(const ScalarizerPassOptions &Options);
339 bool runOnFunction(Function &F) override;
340 void getAnalysisUsage(AnalysisUsage &AU) const override;
341};
342
343} // end anonymous namespace
344
345ScalarizerLegacyPass::ScalarizerLegacyPass(const ScalarizerPassOptions &Options)
347
348void ScalarizerLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
352}
353
354char ScalarizerLegacyPass::ID = 0;
355INITIALIZE_PASS_BEGIN(ScalarizerLegacyPass, "scalarizer",
356 "Scalarize vector operations", false, false)
358INITIALIZE_PASS_END(ScalarizerLegacyPass, "scalarizer",
359 "Scalarize vector operations", false, false)
360
361Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
362 const VectorSplit &VS, ValueVector *cachePtr)
363 : BB(bb), BBI(bbi), V(v), VS(VS), CachePtr(cachePtr) {
364 IsPointer = V->getType()->isPointerTy();
365 if (!CachePtr) {
366 Tmp.resize(VS.NumFragments, nullptr);
367 } else {
368 assert((CachePtr->empty() || VS.NumFragments == CachePtr->size() ||
369 IsPointer) &&
370 "Inconsistent vector sizes");
371 if (VS.NumFragments > CachePtr->size())
372 CachePtr->resize(VS.NumFragments, nullptr);
373 }
374}
375
376// Return fragment Frag, creating a new Value for it if necessary.
377Value *Scatterer::operator[](unsigned Frag) {
378 ValueVector &CV = CachePtr ? *CachePtr : Tmp;
379 // Try to reuse a previous value.
380 if (CV[Frag])
381 return CV[Frag];
382 IRBuilder<> Builder(BB, BBI);
383 if (IsPointer) {
384 if (Frag == 0)
385 CV[Frag] = V;
386 else
387 CV[Frag] = Builder.CreateConstGEP1_32(VS.SplitTy, V, Frag,
388 V->getName() + ".i" + Twine(Frag));
389 return CV[Frag];
390 }
391
392 Type *FragmentTy = VS.getFragmentType(Frag);
393
394 if (auto *VecTy = dyn_cast<FixedVectorType>(FragmentTy)) {
396 for (unsigned J = 0; J < VecTy->getNumElements(); ++J)
397 Mask.push_back(Frag * VS.NumPacked + J);
398 CV[Frag] =
399 Builder.CreateShuffleVector(V, PoisonValue::get(V->getType()), Mask,
400 V->getName() + ".i" + Twine(Frag));
401 } else {
402 // Search through a chain of InsertElementInsts looking for element Frag.
403 // Record other elements in the cache. The new V is still suitable
404 // for all uncached indices.
405 while (true) {
406 InsertElementInst *Insert = dyn_cast<InsertElementInst>(V);
407 if (!Insert)
408 break;
409 ConstantInt *Idx = dyn_cast<ConstantInt>(Insert->getOperand(2));
410 if (!Idx)
411 break;
412 unsigned J = Idx->getZExtValue();
413 V = Insert->getOperand(0);
414 if (Frag * VS.NumPacked == J) {
415 CV[Frag] = Insert->getOperand(1);
416 return CV[Frag];
417 }
418
419 if (VS.NumPacked == 1 && !CV[J]) {
420 // Only cache the first entry we find for each index we're not actively
421 // searching for. This prevents us from going too far up the chain and
422 // caching incorrect entries.
423 CV[J] = Insert->getOperand(1);
424 }
425 }
426 CV[Frag] = Builder.CreateExtractElement(V, Frag * VS.NumPacked,
427 V->getName() + ".i" + Twine(Frag));
428 }
429
430 return CV[Frag];
431}
432
433bool ScalarizerLegacyPass::runOnFunction(Function &F) {
434 if (skipFunction(F))
435 return false;
436
437 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
438 const TargetTransformInfo *TTI =
439 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
440 ScalarizerVisitor Impl(DT, TTI, Options);
441 return Impl.visit(F);
442}
443
445 return new ScalarizerLegacyPass(Options);
446}
447
448bool ScalarizerVisitor::visit(Function &F) {
449 assert(Gathered.empty() && Scattered.empty());
450
451 Scalarized = false;
452
453 // To ensure we replace gathered components correctly we need to do an ordered
454 // traversal of the basic blocks in the function.
455 ReversePostOrderTraversal<BasicBlock *> RPOT(&F.getEntryBlock());
456 for (BasicBlock *BB : RPOT) {
457 for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
458 Instruction *I = &*II;
459 bool Done = InstVisitor::visit(I);
460 ++II;
461 if (Done && I->getType()->isVoidTy()) {
462 I->eraseFromParent();
463 Scalarized = true;
464 }
465 }
466 }
467 return finish();
468}
469
470// Return a scattered form of V that can be accessed by Point. V must be a
471// vector or a pointer to a vector.
472Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V,
473 const VectorSplit &VS) {
474 if (Argument *VArg = dyn_cast<Argument>(V)) {
475 // Put the scattered form of arguments in the entry block,
476 // so that it can be used everywhere.
477 Function *F = VArg->getParent();
478 BasicBlock *BB = &F->getEntryBlock();
479 return Scatterer(BB, BB->begin(), V, VS, &Scattered[{V, VS.SplitTy}]);
480 }
481 if (Instruction *VOp = dyn_cast<Instruction>(V)) {
482 // When scalarizing PHI nodes we might try to examine/rewrite InsertElement
483 // nodes in predecessors. If those predecessors are unreachable from entry,
484 // then the IR in those blocks could have unexpected properties resulting in
485 // infinite loops in Scatterer::operator[]. By simply treating values
486 // originating from instructions in unreachable blocks as undef we do not
487 // need to analyse them further.
488 if (!DT->isReachableFromEntry(VOp->getParent()))
489 return Scatterer(Point->getParent(), Point->getIterator(),
490 PoisonValue::get(V->getType()), VS);
491 // Put the scattered form of an instruction directly after the
492 // instruction, skipping over PHI nodes and debug intrinsics.
493 BasicBlock *BB = VOp->getParent();
494 return Scatterer(
495 BB, skipPastPhiNodesAndDbg(std::next(BasicBlock::iterator(VOp))), V, VS,
496 &Scattered[{V, VS.SplitTy}]);
497 }
498 // In the fallback case, just put the scattered before Point and
499 // keep the result local to Point.
500 return Scatterer(Point->getParent(), Point->getIterator(), V, VS);
501}
502
503// Replace Op with the gathered form of the components in CV. Defer the
504// deletion of Op and creation of the gathered form to the end of the pass,
505// so that we can avoid creating the gathered form if all uses of Op are
506// replaced with uses of CV.
507void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV,
508 const VectorSplit &VS) {
509 transferMetadataAndIRFlags(Op, CV);
510
511 // If we already have a scattered form of Op (created from ExtractElements
512 // of Op itself), replace them with the new form.
513 ValueVector &SV = Scattered[{Op, VS.SplitTy}];
514 if (!SV.empty()) {
515 for (unsigned I = 0, E = SV.size(); I != E; ++I) {
516 Value *V = SV[I];
517 if (V == nullptr || SV[I] == CV[I])
518 continue;
519
520 Instruction *Old = cast<Instruction>(V);
521 if (isa<Instruction>(CV[I]))
522 CV[I]->takeName(Old);
523 Old->replaceAllUsesWith(CV[I]);
524 PotentiallyDeadInstrs.emplace_back(Old);
525 }
526 }
527 SV = CV;
528 Gathered.push_back(GatherList::value_type(Op, &SV));
529}
530
531// Replace Op with CV and collect Op has a potentially dead instruction.
532void ScalarizerVisitor::replaceUses(Instruction *Op, Value *CV) {
533 if (CV != Op) {
534 Op->replaceAllUsesWith(CV);
535 PotentiallyDeadInstrs.emplace_back(Op);
536 Scalarized = true;
537 }
538}
539
540// Return true if it is safe to transfer the given metadata tag from
541// vector to scalar instructions.
542bool ScalarizerVisitor::canTransferMetadata(unsigned Tag) {
543 return (Tag == LLVMContext::MD_tbaa
544 || Tag == LLVMContext::MD_fpmath
545 || Tag == LLVMContext::MD_tbaa_struct
546 || Tag == LLVMContext::MD_invariant_load
547 || Tag == LLVMContext::MD_alias_scope
548 || Tag == LLVMContext::MD_noalias
549 || Tag == LLVMContext::MD_mem_parallel_loop_access
550 || Tag == LLVMContext::MD_access_group);
551}
552
553// Transfer metadata from Op to the instructions in CV if it is known
554// to be safe to do so.
555void ScalarizerVisitor::transferMetadataAndIRFlags(Instruction *Op,
556 const ValueVector &CV) {
558 Op->getAllMetadataOtherThanDebugLoc(MDs);
559 for (Value *V : CV) {
560 if (Instruction *New = dyn_cast<Instruction>(V)) {
561 for (const auto &MD : MDs)
562 if (canTransferMetadata(MD.first))
563 New->setMetadata(MD.first, MD.second);
564 New->copyIRFlags(Op);
565 if (Op->getDebugLoc() && !New->getDebugLoc())
566 New->setDebugLoc(Op->getDebugLoc());
567 }
568 }
569}
570
571// Determine how Ty is split, if at all.
572std::optional<VectorSplit> ScalarizerVisitor::getVectorSplit(Type *Ty) {
573 VectorSplit Split;
574 Split.VecTy = dyn_cast<FixedVectorType>(Ty);
575 if (!Split.VecTy)
576 return {};
577
578 unsigned NumElems = Split.VecTy->getNumElements();
579 Type *ElemTy = Split.VecTy->getElementType();
580
581 if (NumElems == 1 || ElemTy->isPointerTy() ||
582 2 * ElemTy->getScalarSizeInBits() > ScalarizeMinBits) {
583 Split.NumPacked = 1;
584 Split.NumFragments = NumElems;
585 Split.SplitTy = ElemTy;
586 } else {
587 Split.NumPacked = ScalarizeMinBits / ElemTy->getScalarSizeInBits();
588 if (Split.NumPacked >= NumElems)
589 return {};
590
591 Split.NumFragments = divideCeil(NumElems, Split.NumPacked);
592 Split.SplitTy = FixedVectorType::get(ElemTy, Split.NumPacked);
593
594 unsigned RemainderElems = NumElems % Split.NumPacked;
595 if (RemainderElems > 1)
596 Split.RemainderTy = FixedVectorType::get(ElemTy, RemainderElems);
597 else if (RemainderElems == 1)
598 Split.RemainderTy = ElemTy;
599 }
600
601 return Split;
602}
603
604// Try to fill in Layout from Ty, returning true on success. Alignment is
605// the alignment of the vector, or std::nullopt if the ABI default should be
606// used.
607std::optional<VectorLayout>
608ScalarizerVisitor::getVectorLayout(Type *Ty, Align Alignment,
609 const DataLayout &DL) {
610 std::optional<VectorSplit> VS = getVectorSplit(Ty);
611 if (!VS)
612 return {};
613
614 VectorLayout Layout;
615 Layout.VS = *VS;
616 // Check that we're dealing with full-byte fragments.
617 if (!DL.typeSizeEqualsStoreSize(VS->SplitTy) ||
618 (VS->RemainderTy && !DL.typeSizeEqualsStoreSize(VS->RemainderTy)))
619 return {};
620 Layout.VecAlign = Alignment;
621 Layout.SplitSize = DL.getTypeStoreSize(VS->SplitTy);
622 return Layout;
623}
624
625// Scalarize one-operand instruction I, using Split(Builder, X, Name)
626// to create an instruction like I with operand X and name Name.
627template<typename Splitter>
628bool ScalarizerVisitor::splitUnary(Instruction &I, const Splitter &Split) {
629 std::optional<VectorSplit> VS = getVectorSplit(I.getType());
630 if (!VS)
631 return false;
632
633 std::optional<VectorSplit> OpVS;
634 if (I.getOperand(0)->getType() == I.getType()) {
635 OpVS = VS;
636 } else {
637 OpVS = getVectorSplit(I.getOperand(0)->getType());
638 if (!OpVS || VS->NumPacked != OpVS->NumPacked)
639 return false;
640 }
641
642 IRBuilder<> Builder(&I);
643 Scatterer Op = scatter(&I, I.getOperand(0), *OpVS);
644 assert(Op.size() == VS->NumFragments && "Mismatched unary operation");
645 ValueVector Res;
646 Res.resize(VS->NumFragments);
647 for (unsigned Frag = 0; Frag < VS->NumFragments; ++Frag)
648 Res[Frag] = Split(Builder, Op[Frag], I.getName() + ".i" + Twine(Frag));
649 gather(&I, Res, *VS);
650 return true;
651}
652
653// Scalarize two-operand instruction I, using Split(Builder, X, Y, Name)
654// to create an instruction like I with operands X and Y and name Name.
655template<typename Splitter>
656bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) {
657 std::optional<VectorSplit> VS = getVectorSplit(I.getType());
658 if (!VS)
659 return false;
660
661 std::optional<VectorSplit> OpVS;
662 if (I.getOperand(0)->getType() == I.getType()) {
663 OpVS = VS;
664 } else {
665 OpVS = getVectorSplit(I.getOperand(0)->getType());
666 if (!OpVS || VS->NumPacked != OpVS->NumPacked)
667 return false;
668 }
669
670 IRBuilder<> Builder(&I);
671 Scatterer VOp0 = scatter(&I, I.getOperand(0), *OpVS);
672 Scatterer VOp1 = scatter(&I, I.getOperand(1), *OpVS);
673 assert(VOp0.size() == VS->NumFragments && "Mismatched binary operation");
674 assert(VOp1.size() == VS->NumFragments && "Mismatched binary operation");
675 ValueVector Res;
676 Res.resize(VS->NumFragments);
677 for (unsigned Frag = 0; Frag < VS->NumFragments; ++Frag) {
678 Value *Op0 = VOp0[Frag];
679 Value *Op1 = VOp1[Frag];
680 Res[Frag] = Split(Builder, Op0, Op1, I.getName() + ".i" + Twine(Frag));
681 }
682 gather(&I, Res, *VS);
683 return true;
684}
685
686/// If a call to a vector typed intrinsic function, split into a scalar call per
687/// element if possible for the intrinsic.
688bool ScalarizerVisitor::splitCall(CallInst &CI) {
689 Type *CallType = CI.getType();
690 bool AreAllVectorsOfMatchingSize = isStructOfMatchingFixedVectors(CallType);
691 std::optional<VectorSplit> VS;
692 if (AreAllVectorsOfMatchingSize)
693 VS = getVectorSplit(CallType->getContainedType(0));
694 else
695 VS = getVectorSplit(CallType);
696 if (!VS)
697 return false;
698
700 if (!F)
701 return false;
702
703 Intrinsic::ID ID = F->getIntrinsicID();
704
706 return false;
707
708 // unsigned NumElems = VT->getNumElements();
709 unsigned NumArgs = CI.arg_size();
710
711 ValueVector ScalarOperands(NumArgs);
712 SmallVector<Scatterer, 8> Scattered(NumArgs);
713 SmallVector<int> OverloadIdx(NumArgs, -1);
714
716 // Add return type if intrinsic is overloaded on it.
718 Tys.push_back(VS->SplitTy);
719
720 if (AreAllVectorsOfMatchingSize) {
721 for (unsigned I = 1; I < CallType->getNumContainedTypes(); I++) {
722 std::optional<VectorSplit> CurrVS =
723 getVectorSplit(cast<FixedVectorType>(CallType->getContainedType(I)));
724 // It is possible for VectorSplit.NumPacked >= NumElems. If that happens a
725 // VectorSplit is not returned and we will bailout of handling this call.
726 // The secondary bailout case is if NumPacked does not match. This can
727 // happen if ScalarizeMinBits is not set to the default. This means with
728 // certain ScalarizeMinBits intrinsics like frexp will only scalarize when
729 // the struct elements have the same bitness.
730 if (!CurrVS || CurrVS->NumPacked != VS->NumPacked)
731 return false;
733 Tys.push_back(CurrVS->SplitTy);
734 }
735 }
736 // Assumes that any vector type has the same number of elements as the return
737 // vector type, which is true for all current intrinsics.
738 for (unsigned I = 0; I != NumArgs; ++I) {
739 Value *OpI = CI.getOperand(I);
740 if ([[maybe_unused]] auto *OpVecTy =
741 dyn_cast<FixedVectorType>(OpI->getType())) {
742 assert(OpVecTy->getNumElements() == VS->VecTy->getNumElements());
743 std::optional<VectorSplit> OpVS = getVectorSplit(OpI->getType());
744 if (!OpVS || OpVS->NumPacked != VS->NumPacked) {
745 // The natural split of the operand doesn't match the result. This could
746 // happen if the vector elements are different and the ScalarizeMinBits
747 // option is used.
748 //
749 // We could in principle handle this case as well, at the cost of
750 // complicating the scattering machinery to support multiple scattering
751 // granularities for a single value.
752 return false;
753 }
754
755 Scattered[I] = scatter(&CI, OpI, *OpVS);
757 OverloadIdx[I] = Tys.size();
758 Tys.push_back(OpVS->SplitTy);
759 }
760 } else {
761 ScalarOperands[I] = OpI;
763 Tys.push_back(OpI->getType());
764 }
765 }
766
767 ValueVector Res(VS->NumFragments);
768 ValueVector ScalarCallOps(NumArgs);
769
770 Function *NewIntrin =
771 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
772 IRBuilder<> Builder(&CI);
773
774 // Perform actual scalarization, taking care to preserve any scalar operands.
775 for (unsigned I = 0; I < VS->NumFragments; ++I) {
776 bool IsRemainder = I == VS->NumFragments - 1 && VS->RemainderTy;
777 ScalarCallOps.clear();
778
779 if (IsRemainder)
780 Tys[0] = VS->RemainderTy;
781
782 for (unsigned J = 0; J != NumArgs; ++J) {
784 ScalarCallOps.push_back(ScalarOperands[J]);
785 } else {
786 ScalarCallOps.push_back(Scattered[J][I]);
787 if (IsRemainder && OverloadIdx[J] >= 0)
788 Tys[OverloadIdx[J]] = Scattered[J][I]->getType();
789 }
790 }
791
792 if (IsRemainder)
793 NewIntrin = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
794
795 Res[I] = Builder.CreateCall(NewIntrin, ScalarCallOps,
796 CI.getName() + ".i" + Twine(I));
797 }
798
799 gather(&CI, Res, *VS);
800 return true;
801}
802
803bool ScalarizerVisitor::visitSelectInst(SelectInst &SI) {
804 std::optional<VectorSplit> VS = getVectorSplit(SI.getType());
805 if (!VS)
806 return false;
807
808 std::optional<VectorSplit> CondVS;
809 if (isa<FixedVectorType>(SI.getCondition()->getType())) {
810 CondVS = getVectorSplit(SI.getCondition()->getType());
811 if (!CondVS || CondVS->NumPacked != VS->NumPacked) {
812 // This happens when ScalarizeMinBits is used.
813 return false;
814 }
815 }
816
817 IRBuilder<> Builder(&SI);
818 Scatterer VOp1 = scatter(&SI, SI.getOperand(1), *VS);
819 Scatterer VOp2 = scatter(&SI, SI.getOperand(2), *VS);
820 assert(VOp1.size() == VS->NumFragments && "Mismatched select");
821 assert(VOp2.size() == VS->NumFragments && "Mismatched select");
822 ValueVector Res;
823 Res.resize(VS->NumFragments);
824
825 if (CondVS) {
826 Scatterer VOp0 = scatter(&SI, SI.getOperand(0), *CondVS);
827 assert(VOp0.size() == CondVS->NumFragments && "Mismatched select");
828 for (unsigned I = 0; I < VS->NumFragments; ++I) {
829 Value *Op0 = VOp0[I];
830 Value *Op1 = VOp1[I];
831 Value *Op2 = VOp2[I];
832 Res[I] = Builder.CreateSelect(Op0, Op1, Op2,
833 SI.getName() + ".i" + Twine(I));
834 }
835 } else {
836 Value *Op0 = SI.getOperand(0);
837 for (unsigned I = 0; I < VS->NumFragments; ++I) {
838 Value *Op1 = VOp1[I];
839 Value *Op2 = VOp2[I];
840 Res[I] = Builder.CreateSelect(Op0, Op1, Op2,
841 SI.getName() + ".i" + Twine(I));
842 }
843 }
844 gather(&SI, Res, *VS);
845 return true;
846}
847
848bool ScalarizerVisitor::visitICmpInst(ICmpInst &ICI) {
849 return splitBinary(ICI, ICmpSplitter(ICI));
850}
851
852bool ScalarizerVisitor::visitFCmpInst(FCmpInst &FCI) {
853 return splitBinary(FCI, FCmpSplitter(FCI));
854}
855
856bool ScalarizerVisitor::visitUnaryOperator(UnaryOperator &UO) {
857 return splitUnary(UO, UnarySplitter(UO));
858}
859
860bool ScalarizerVisitor::visitBinaryOperator(BinaryOperator &BO) {
861 return splitBinary(BO, BinarySplitter(BO));
862}
863
864bool ScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
865 std::optional<VectorSplit> VS = getVectorSplit(GEPI.getType());
866 if (!VS)
867 return false;
868
869 IRBuilder<> Builder(&GEPI);
870 unsigned NumIndices = GEPI.getNumIndices();
871
872 // The base pointer and indices might be scalar even if it's a vector GEP.
873 SmallVector<Value *, 8> ScalarOps{1 + NumIndices};
874 SmallVector<Scatterer, 8> ScatterOps{1 + NumIndices};
875
876 for (unsigned I = 0; I < 1 + NumIndices; ++I) {
877 if (auto *VecTy =
878 dyn_cast<FixedVectorType>(GEPI.getOperand(I)->getType())) {
879 std::optional<VectorSplit> OpVS = getVectorSplit(VecTy);
880 if (!OpVS || OpVS->NumPacked != VS->NumPacked) {
881 // This can happen when ScalarizeMinBits is used.
882 return false;
883 }
884 ScatterOps[I] = scatter(&GEPI, GEPI.getOperand(I), *OpVS);
885 } else {
886 ScalarOps[I] = GEPI.getOperand(I);
887 }
888 }
889
890 ValueVector Res;
891 Res.resize(VS->NumFragments);
892 for (unsigned I = 0; I < VS->NumFragments; ++I) {
894 SplitOps.resize(1 + NumIndices);
895 for (unsigned J = 0; J < 1 + NumIndices; ++J) {
896 if (ScalarOps[J])
897 SplitOps[J] = ScalarOps[J];
898 else
899 SplitOps[J] = ScatterOps[J][I];
900 }
901 Res[I] = Builder.CreateGEP(GEPI.getSourceElementType(), SplitOps[0],
902 ArrayRef(SplitOps).drop_front(),
903 GEPI.getName() + ".i" + Twine(I));
904 if (GEPI.isInBounds())
905 if (GetElementPtrInst *NewGEPI = dyn_cast<GetElementPtrInst>(Res[I]))
906 NewGEPI->setIsInBounds();
907 }
908 gather(&GEPI, Res, *VS);
909 return true;
910}
911
912bool ScalarizerVisitor::visitCastInst(CastInst &CI) {
913 std::optional<VectorSplit> DestVS = getVectorSplit(CI.getDestTy());
914 if (!DestVS)
915 return false;
916
917 std::optional<VectorSplit> SrcVS = getVectorSplit(CI.getSrcTy());
918 if (!SrcVS || SrcVS->NumPacked != DestVS->NumPacked)
919 return false;
920
921 IRBuilder<> Builder(&CI);
922 Scatterer Op0 = scatter(&CI, CI.getOperand(0), *SrcVS);
923 assert(Op0.size() == SrcVS->NumFragments && "Mismatched cast");
924 ValueVector Res;
925 Res.resize(DestVS->NumFragments);
926 for (unsigned I = 0; I < DestVS->NumFragments; ++I)
927 Res[I] =
928 Builder.CreateCast(CI.getOpcode(), Op0[I], DestVS->getFragmentType(I),
929 CI.getName() + ".i" + Twine(I));
930 gather(&CI, Res, *DestVS);
931 return true;
932}
933
934bool ScalarizerVisitor::visitBitCastInst(BitCastInst &BCI) {
935 std::optional<VectorSplit> DstVS = getVectorSplit(BCI.getDestTy());
936 std::optional<VectorSplit> SrcVS = getVectorSplit(BCI.getSrcTy());
937 if (!DstVS || !SrcVS || DstVS->RemainderTy || SrcVS->RemainderTy)
938 return false;
939
940 const bool isPointerTy = DstVS->VecTy->getElementType()->isPointerTy();
941
942 // Vectors of pointers are always fully scalarized.
943 assert(!isPointerTy || (DstVS->NumPacked == 1 && SrcVS->NumPacked == 1));
944
945 IRBuilder<> Builder(&BCI);
946 Scatterer Op0 = scatter(&BCI, BCI.getOperand(0), *SrcVS);
947 ValueVector Res;
948 Res.resize(DstVS->NumFragments);
949
950 unsigned DstSplitBits = DstVS->SplitTy->getPrimitiveSizeInBits();
951 unsigned SrcSplitBits = SrcVS->SplitTy->getPrimitiveSizeInBits();
952
953 if (isPointerTy || DstSplitBits == SrcSplitBits) {
954 assert(DstVS->NumFragments == SrcVS->NumFragments);
955 for (unsigned I = 0; I < DstVS->NumFragments; ++I) {
956 Res[I] = Builder.CreateBitCast(Op0[I], DstVS->getFragmentType(I),
957 BCI.getName() + ".i" + Twine(I));
958 }
959 } else if (SrcSplitBits % DstSplitBits == 0) {
960 // Convert each source fragment to the same-sized destination vector and
961 // then scatter the result to the destination.
962 VectorSplit MidVS;
963 MidVS.NumPacked = DstVS->NumPacked;
964 MidVS.NumFragments = SrcSplitBits / DstSplitBits;
965 MidVS.VecTy = FixedVectorType::get(DstVS->VecTy->getElementType(),
966 MidVS.NumPacked * MidVS.NumFragments);
967 MidVS.SplitTy = DstVS->SplitTy;
968
969 unsigned ResI = 0;
970 for (unsigned I = 0; I < SrcVS->NumFragments; ++I) {
971 Value *V = Op0[I];
972
973 // Look through any existing bitcasts before converting to <N x t2>.
974 // In the best case, the resulting conversion might be a no-op.
976 while ((VI = dyn_cast<Instruction>(V)) &&
977 VI->getOpcode() == Instruction::BitCast)
978 V = VI->getOperand(0);
979
980 V = Builder.CreateBitCast(V, MidVS.VecTy, V->getName() + ".cast");
981
982 Scatterer Mid = scatter(&BCI, V, MidVS);
983 for (unsigned J = 0; J < MidVS.NumFragments; ++J)
984 Res[ResI++] = Mid[J];
985 }
986 } else if (DstSplitBits % SrcSplitBits == 0) {
987 // Gather enough source fragments to make up a destination fragment and
988 // then convert to the destination type.
989 VectorSplit MidVS;
990 MidVS.NumFragments = DstSplitBits / SrcSplitBits;
991 MidVS.NumPacked = SrcVS->NumPacked;
992 MidVS.VecTy = FixedVectorType::get(SrcVS->VecTy->getElementType(),
993 MidVS.NumPacked * MidVS.NumFragments);
994 MidVS.SplitTy = SrcVS->SplitTy;
995
996 unsigned SrcI = 0;
997 SmallVector<Value *, 8> ConcatOps;
998 ConcatOps.resize(MidVS.NumFragments);
999 for (unsigned I = 0; I < DstVS->NumFragments; ++I) {
1000 for (unsigned J = 0; J < MidVS.NumFragments; ++J)
1001 ConcatOps[J] = Op0[SrcI++];
1002 Value *V = concatenate(Builder, ConcatOps, MidVS,
1003 BCI.getName() + ".i" + Twine(I));
1004 Res[I] = Builder.CreateBitCast(V, DstVS->getFragmentType(I),
1005 BCI.getName() + ".i" + Twine(I));
1006 }
1007 } else {
1008 return false;
1009 }
1010
1011 gather(&BCI, Res, *DstVS);
1012 return true;
1013}
1014
1015bool ScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) {
1016 std::optional<VectorSplit> VS = getVectorSplit(IEI.getType());
1017 if (!VS)
1018 return false;
1019
1020 IRBuilder<> Builder(&IEI);
1021 Scatterer Op0 = scatter(&IEI, IEI.getOperand(0), *VS);
1022 Value *NewElt = IEI.getOperand(1);
1023 Value *InsIdx = IEI.getOperand(2);
1024
1025 ValueVector Res;
1026 Res.resize(VS->NumFragments);
1027
1028 if (auto *CI = dyn_cast<ConstantInt>(InsIdx)) {
1029 unsigned Idx = CI->getZExtValue();
1030 unsigned Fragment = Idx / VS->NumPacked;
1031 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1032 if (I == Fragment) {
1033 bool IsPacked = VS->NumPacked > 1;
1034 if (Fragment == VS->NumFragments - 1 && VS->RemainderTy &&
1035 !VS->RemainderTy->isVectorTy())
1036 IsPacked = false;
1037 if (IsPacked) {
1038 Res[I] =
1039 Builder.CreateInsertElement(Op0[I], NewElt, Idx % VS->NumPacked);
1040 } else {
1041 Res[I] = NewElt;
1042 }
1043 } else {
1044 Res[I] = Op0[I];
1045 }
1046 }
1047 } else {
1048 // Never split a variable insertelement that isn't fully scalarized.
1049 if (!ScalarizeVariableInsertExtract || VS->NumPacked > 1)
1050 return false;
1051
1052 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1053 Value *ShouldReplace =
1054 Builder.CreateICmpEQ(InsIdx, ConstantInt::get(InsIdx->getType(), I),
1055 InsIdx->getName() + ".is." + Twine(I));
1056 Value *OldElt = Op0[I];
1057 Res[I] = Builder.CreateSelect(ShouldReplace, NewElt, OldElt,
1058 IEI.getName() + ".i" + Twine(I));
1059 }
1060 }
1061
1062 gather(&IEI, Res, *VS);
1063 return true;
1064}
1065
1066bool ScalarizerVisitor::visitExtractValueInst(ExtractValueInst &EVI) {
1067 Value *Op = EVI.getOperand(0);
1068 Type *OpTy = Op->getType();
1069 ValueVector Res;
1070 if (!isStructOfMatchingFixedVectors(OpTy))
1071 return false;
1072 if (CallInst *CI = dyn_cast<CallInst>(Op)) {
1073 Function *F = CI->getCalledFunction();
1074 if (!F)
1075 return false;
1076 Intrinsic::ID ID = F->getIntrinsicID();
1078 return false;
1079 // Note: Fall through means Operand is a`CallInst` and it is defined in
1080 // `isTriviallyScalarizable`.
1081 } else
1082 return false;
1083 Type *VecType = cast<FixedVectorType>(OpTy->getContainedType(0));
1084 std::optional<VectorSplit> VS = getVectorSplit(VecType);
1085 if (!VS)
1086 return false;
1087 for (unsigned I = 1; I < OpTy->getNumContainedTypes(); I++) {
1088 std::optional<VectorSplit> CurrVS =
1089 getVectorSplit(cast<FixedVectorType>(OpTy->getContainedType(I)));
1090 // It is possible for VectorSplit.NumPacked >= NumElems. If that happens a
1091 // VectorSplit is not returned and we will bailout of handling this call.
1092 // The secondary bailout case is if NumPacked does not match. This can
1093 // happen if ScalarizeMinBits is not set to the default. This means with
1094 // certain ScalarizeMinBits intrinsics like frexp will only scalarize when
1095 // the struct elements have the same bitness.
1096 if (!CurrVS || CurrVS->NumPacked != VS->NumPacked)
1097 return false;
1098 }
1099 IRBuilder<> Builder(&EVI);
1100 Scatterer Op0 = scatter(&EVI, Op, *VS);
1101 assert(!EVI.getIndices().empty() && "Make sure an index exists");
1102 // Note for our use case we only care about the top level index.
1103 unsigned Index = EVI.getIndices()[0];
1104 for (unsigned OpIdx = 0; OpIdx < Op0.size(); ++OpIdx) {
1105 Value *ResElem = Builder.CreateExtractValue(
1106 Op0[OpIdx], Index, EVI.getName() + ".elem" + Twine(Index));
1107 Res.push_back(ResElem);
1108 }
1109
1110 Type *ActualVecType = cast<FixedVectorType>(OpTy->getContainedType(Index));
1111 std::optional<VectorSplit> AVS = getVectorSplit(ActualVecType);
1112 gather(&EVI, Res, *AVS);
1113 return true;
1114}
1115
1116bool ScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
1117 std::optional<VectorSplit> VS = getVectorSplit(EEI.getOperand(0)->getType());
1118 if (!VS)
1119 return false;
1120
1121 IRBuilder<> Builder(&EEI);
1122 Scatterer Op0 = scatter(&EEI, EEI.getOperand(0), *VS);
1123 Value *ExtIdx = EEI.getOperand(1);
1124
1125 if (auto *CI = dyn_cast<ConstantInt>(ExtIdx)) {
1126 unsigned Idx = CI->getZExtValue();
1127 unsigned Fragment = Idx / VS->NumPacked;
1128 Value *Res = Op0[Fragment];
1129 bool IsPacked = VS->NumPacked > 1;
1130 if (Fragment == VS->NumFragments - 1 && VS->RemainderTy &&
1131 !VS->RemainderTy->isVectorTy())
1132 IsPacked = false;
1133 if (IsPacked)
1134 Res = Builder.CreateExtractElement(Res, Idx % VS->NumPacked);
1135 replaceUses(&EEI, Res);
1136 return true;
1137 }
1138
1139 // Never split a variable extractelement that isn't fully scalarized.
1140 if (!ScalarizeVariableInsertExtract || VS->NumPacked > 1)
1141 return false;
1142
1143 Value *Res = PoisonValue::get(VS->VecTy->getElementType());
1144 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1145 Value *ShouldExtract =
1146 Builder.CreateICmpEQ(ExtIdx, ConstantInt::get(ExtIdx->getType(), I),
1147 ExtIdx->getName() + ".is." + Twine(I));
1148 Value *Elt = Op0[I];
1149 Res = Builder.CreateSelect(ShouldExtract, Elt, Res,
1150 EEI.getName() + ".upto" + Twine(I));
1151 }
1152 replaceUses(&EEI, Res);
1153 return true;
1154}
1155
1156bool ScalarizerVisitor::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
1157 std::optional<VectorSplit> VS = getVectorSplit(SVI.getType());
1158 std::optional<VectorSplit> VSOp =
1159 getVectorSplit(SVI.getOperand(0)->getType());
1160 if (!VS || !VSOp || VS->NumPacked > 1 || VSOp->NumPacked > 1)
1161 return false;
1162
1163 Scatterer Op0 = scatter(&SVI, SVI.getOperand(0), *VSOp);
1164 Scatterer Op1 = scatter(&SVI, SVI.getOperand(1), *VSOp);
1165 ValueVector Res;
1166 Res.resize(VS->NumFragments);
1167
1168 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1169 int Selector = SVI.getMaskValue(I);
1170 if (Selector < 0)
1171 Res[I] = PoisonValue::get(VS->VecTy->getElementType());
1172 else if (unsigned(Selector) < Op0.size())
1173 Res[I] = Op0[Selector];
1174 else
1175 Res[I] = Op1[Selector - Op0.size()];
1176 }
1177 gather(&SVI, Res, *VS);
1178 return true;
1179}
1180
1181bool ScalarizerVisitor::visitPHINode(PHINode &PHI) {
1182 std::optional<VectorSplit> VS = getVectorSplit(PHI.getType());
1183 if (!VS)
1184 return false;
1185
1186 IRBuilder<> Builder(&PHI);
1187 ValueVector Res;
1188 Res.resize(VS->NumFragments);
1189
1190 unsigned NumOps = PHI.getNumOperands();
1191 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1192 Res[I] = Builder.CreatePHI(VS->getFragmentType(I), NumOps,
1193 PHI.getName() + ".i" + Twine(I));
1194 }
1195
1196 for (unsigned I = 0; I < NumOps; ++I) {
1197 Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I), *VS);
1198 BasicBlock *IncomingBlock = PHI.getIncomingBlock(I);
1199 for (unsigned J = 0; J < VS->NumFragments; ++J)
1200 cast<PHINode>(Res[J])->addIncoming(Op[J], IncomingBlock);
1201 }
1202 gather(&PHI, Res, *VS);
1203 return true;
1204}
1205
1206bool ScalarizerVisitor::visitLoadInst(LoadInst &LI) {
1207 if (!ScalarizeLoadStore)
1208 return false;
1209 if (!LI.isSimple())
1210 return false;
1211
1212 std::optional<VectorLayout> Layout = getVectorLayout(
1213 LI.getType(), LI.getAlign(), LI.getDataLayout());
1214 if (!Layout)
1215 return false;
1216
1217 IRBuilder<> Builder(&LI);
1218 Scatterer Ptr = scatter(&LI, LI.getPointerOperand(), Layout->VS);
1219 ValueVector Res;
1220 Res.resize(Layout->VS.NumFragments);
1221
1222 for (unsigned I = 0; I < Layout->VS.NumFragments; ++I) {
1223 Res[I] = Builder.CreateAlignedLoad(Layout->VS.getFragmentType(I), Ptr[I],
1224 Align(Layout->getFragmentAlign(I)),
1225 LI.getName() + ".i" + Twine(I));
1226 }
1227 gather(&LI, Res, Layout->VS);
1228 return true;
1229}
1230
1231bool ScalarizerVisitor::visitStoreInst(StoreInst &SI) {
1232 if (!ScalarizeLoadStore)
1233 return false;
1234 if (!SI.isSimple())
1235 return false;
1236
1237 Value *FullValue = SI.getValueOperand();
1238 std::optional<VectorLayout> Layout = getVectorLayout(
1239 FullValue->getType(), SI.getAlign(), SI.getDataLayout());
1240 if (!Layout)
1241 return false;
1242
1243 IRBuilder<> Builder(&SI);
1244 Scatterer VPtr = scatter(&SI, SI.getPointerOperand(), Layout->VS);
1245 Scatterer VVal = scatter(&SI, FullValue, Layout->VS);
1246
1247 ValueVector Stores;
1248 Stores.resize(Layout->VS.NumFragments);
1249 for (unsigned I = 0; I < Layout->VS.NumFragments; ++I) {
1250 Value *Val = VVal[I];
1251 Value *Ptr = VPtr[I];
1252 Stores[I] =
1253 Builder.CreateAlignedStore(Val, Ptr, Layout->getFragmentAlign(I));
1254 }
1255 transferMetadataAndIRFlags(&SI, Stores);
1256 return true;
1257}
1258
1259bool ScalarizerVisitor::visitCallInst(CallInst &CI) {
1260 return splitCall(CI);
1261}
1262
1263bool ScalarizerVisitor::visitFreezeInst(FreezeInst &FI) {
1264 return splitUnary(FI, [](IRBuilder<> &Builder, Value *Op, const Twine &Name) {
1265 return Builder.CreateFreeze(Op, Name);
1266 });
1267}
1268
1269// Delete the instructions that we scalarized. If a full vector result
1270// is still needed, recreate it using InsertElements.
1271bool ScalarizerVisitor::finish() {
1272 // The presence of data in Gathered or Scattered indicates changes
1273 // made to the Function.
1274 if (Gathered.empty() && Scattered.empty() && !Scalarized)
1275 return false;
1276 for (const auto &GMI : Gathered) {
1277 Instruction *Op = GMI.first;
1278 ValueVector &CV = *GMI.second;
1279 if (!Op->use_empty()) {
1280 // The value is still needed, so recreate it using a series of
1281 // insertelements and/or shufflevectors.
1282 Value *Res;
1283 if (auto *Ty = dyn_cast<FixedVectorType>(Op->getType())) {
1284 BasicBlock *BB = Op->getParent();
1285 IRBuilder<> Builder(Op);
1286 if (isa<PHINode>(Op))
1287 Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
1288
1289 VectorSplit VS = *getVectorSplit(Ty);
1290 assert(VS.NumFragments == CV.size());
1291
1292 Res = concatenate(Builder, CV, VS, Op->getName());
1293
1294 Res->takeName(Op);
1295 } else if (auto *Ty = dyn_cast<StructType>(Op->getType())) {
1296 BasicBlock *BB = Op->getParent();
1297 IRBuilder<> Builder(Op);
1298 if (isa<PHINode>(Op))
1299 Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
1300
1301 // Iterate over each element in the struct
1302 unsigned NumOfStructElements = Ty->getNumElements();
1303 SmallVector<ValueVector, 4> ElemCV(NumOfStructElements);
1304 for (unsigned I = 0; I < NumOfStructElements; ++I) {
1305 for (auto *CVelem : CV) {
1306 Value *Elem = Builder.CreateExtractValue(
1307 CVelem, I, Op->getName() + ".elem" + Twine(I));
1308 ElemCV[I].push_back(Elem);
1309 }
1310 }
1311 Res = PoisonValue::get(Ty);
1312 for (unsigned I = 0; I < NumOfStructElements; ++I) {
1313 Type *ElemTy = Ty->getElementType(I);
1314 assert(isa<FixedVectorType>(ElemTy) &&
1315 "Only Structs of all FixedVectorType supported");
1316 VectorSplit VS = *getVectorSplit(ElemTy);
1317 assert(VS.NumFragments == CV.size());
1318
1319 Value *ConcatenatedVector =
1320 concatenate(Builder, ElemCV[I], VS, Op->getName());
1321 Res = Builder.CreateInsertValue(Res, ConcatenatedVector, I,
1322 Op->getName() + ".insert");
1323 }
1324 } else {
1325 assert(CV.size() == 1 && Op->getType() == CV[0]->getType());
1326 Res = CV[0];
1327 if (Op == Res)
1328 continue;
1329 }
1330 Op->replaceAllUsesWith(Res);
1331 }
1332 PotentiallyDeadInstrs.emplace_back(Op);
1333 }
1334 Gathered.clear();
1335 Scattered.clear();
1336 Scalarized = false;
1337
1339
1340 return true;
1341}
1342
1346 ScalarizerVisitor Impl(DT, TTI, Options);
1347 bool Changed = Impl.visit(F);
1350 return Changed ? PA : PreservedAnalyses::all();
1351}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
Module.h This file contains the declarations for the Module class.
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:39
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
Scalarize vector operations
Definition: Scalarizer.cpp:359
scalarizer
Definition: Scalarizer.cpp:358
This pass converts vector operations into scalar operations (or, optionally, operations on smaller ve...
This file defines the SmallVector class.
This pass exposes codegen information to IR-level passes.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:255
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:412
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
This class represents an incoming formal argument to a Function.
Definition: Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:142
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
iterator end()
Definition: BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:459
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:393
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:213
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:170
This class represents a no-op cast from one type to another.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1348
unsigned arg_size() const
Definition: InstrTypes.h:1290
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:448
Type * getSrcTy() const
Return the source type, as a convenience.
Definition: InstrTypes.h:617
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:612
Type * getDestTy() const
Return the destination type, as a convenience.
Definition: InstrTypes.h:619
This is the shared class of boolean and integer constants.
Definition: Constants.h:87
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:284
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:322
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:165
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:334
This instruction extracts a single (scalar) element from a VectorType value.
This instruction extracts a struct member or array element value from an aggregate value.
ArrayRef< unsigned > getIndices() const
This instruction compares its operands according to the predicate given to the constructor.
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:592
unsigned getNumElements() const
Definition: DerivedTypes.h:635
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:803
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:314
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:949
LLVM_ABI bool isInBounds() const
Determine whether the GEP has the inbounds flag.
Type * getSourceElementType() const
unsigned getNumIndices() const
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2449
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2571
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2625
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2618
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2637
Value * CreateUnOp(Instruction::UnaryOps Opc, Value *V, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1809
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2593
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1708
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:207
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2439
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2780
This instruction inserts a single (scalar) element into a VectorType value.
VectorType * getType() const
Overload to return most specific vector type.
Base class for instruction visitors.
Definition: InstVisitor.h:78
RetTy visitFreezeInst(FreezeInst &I)
Definition: InstVisitor.h:201
RetTy visitFCmpInst(FCmpInst &I)
Definition: InstVisitor.h:167
RetTy visitExtractElementInst(ExtractElementInst &I)
Definition: InstVisitor.h:192
RetTy visitShuffleVectorInst(ShuffleVectorInst &I)
Definition: InstVisitor.h:194
RetTy visitBitCastInst(BitCastInst &I)
Definition: InstVisitor.h:188
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:87
RetTy visitPHINode(PHINode &I)
Definition: InstVisitor.h:175
RetTy visitExtractValueInst(ExtractValueInst &I)
Definition: InstVisitor.h:195
RetTy visitUnaryOperator(UnaryOperator &I)
Definition: InstVisitor.h:255
RetTy visitStoreInst(StoreInst &I)
Definition: InstVisitor.h:170
RetTy visitInsertElementInst(InsertElementInst &I)
Definition: InstVisitor.h:193
RetTy visitBinaryOperator(BinaryOperator &I)
Definition: InstVisitor.h:256
RetTy visitICmpInst(ICmpInst &I)
Definition: InstVisitor.h:166
RetTy visitCallInst(CallInst &I)
Definition: InstVisitor.h:215
RetTy visitCastInst(CastInst &I)
Definition: InstVisitor.h:254
RetTy visitSelectInst(SelectInst &I)
Definition: InstVisitor.h:190
RetTy visitGetElementPtrInst(GetElementPtrInst &I)
Definition: InstVisitor.h:174
void visitInstruction(Instruction &I)
Definition: InstVisitor.h:275
RetTy visitLoadInst(LoadInst &I)
Definition: InstVisitor.h:169
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:86
An instruction for reading from memory.
Definition: Instructions.h:180
Value * getPointerOperand()
Definition: Instructions.h:259
bool isSimple() const
Definition: Instructions.h:251
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:215
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:112
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1885
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition: Analysis.h:132
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
This class represents the LLVM 'select' instruction.
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
size_t size() const
Definition: SmallVector.h:79
void resize(size_type N)
Definition: SmallVector.h:639
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
An instruction for storing to memory.
Definition: Instructions.h:296
Analysis pass providing the TargetTransformInfo.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:267
unsigned getNumContainedTypes() const
Return the number of types in the derived type.
Definition: Type.h:387
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition: Type.h:381
Value * getOperand(unsigned i) const
Definition: User.h:232
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:546
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:396
const ParentTy * getParent() const
Definition: ilist_node.h:34
self_iterator getIterator()
Definition: ilist_node.h:134
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:126
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:751
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
LLVM_ABI bool isTriviallyScalarizable(Intrinsic::ID ID, const TargetTransformInfo *TTI)
Identify if the intrinsic is trivially scalarizable.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1702
@ Done
Definition: Threading.h:60
LLVM_ABI BasicBlock::iterator skipDebugIntrinsics(BasicBlock::iterator It)
Advance It while it points to a debug instruction and return the result.
Definition: BasicBlock.cpp:682
bool isPointerTy(const Type *T)
Definition: SPIRVUtils.h:288
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:399
LLVM_ABI bool isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic that returns a struct is overloaded at the struct elem...
TargetTransformInfo TTI
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
DWARFExpression::Operation Op
LLVM_ABI FunctionPass * createScalarizerPass(const ScalarizerPassOptions &Options=ScalarizerPassOptions())
Create a legacy pass manager instance of the Scalarizer pass.
Definition: Scalarizer.cpp:444
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
Definition: Local.cpp:548
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
LLVM_ABI bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39