40#define DEBUG_TYPE "vplan-slp"
51 visitBlock(
Base, Old2New, IAI);
55void VPInterleavedAccessInfo::visitBlock(
VPBlockBase *
Block, Old2NewTy &Old2New,
59 if (isa<VPWidenPHIRecipe>(&VPI))
61 auto *VPInst = cast<VPInstruction>(&VPI);
62 auto *Inst = dyn_cast_or_null<Instruction>(VPInst->getUnderlyingValue());
69 auto NewIGIter = Old2New.find(IG);
70 if (NewIGIter == Old2New.end())
72 IG->getFactor(), IG->isReverse(), IG->getAlign());
74 if (Inst == IG->getInsertPos())
75 Old2New[IG]->setInsertPos(VPInst);
77 InterleaveGroupMap[VPInst] = Old2New[IG];
78 InterleaveGroupMap[VPInst]->insertMember(
79 VPInst, IG->getIndex(Inst),
80 Align(IG->isReverse() ? (-1) *
int(IG->getFactor())
84 visitRegion(
Region, Old2New, IAI);
99 CompletelySLP =
false;
105 return cast<VPInstruction>(V)->getUnderlyingInstr();
107 unsigned BundleSize = 0;
109 Type *
T = cast<VPInstruction>(V)->getUnderlyingInstr()->getType();
110 assert(!
T->isVectorTy() &&
"Only scalar types supported for now");
111 BundleSize +=
T->getScalarSizeInBits();
113 WidestBundleBits = std::max(WidestBundleBits, BundleSize);
116 auto Res = BundleToCombined.try_emplace(to_vector<4>(
Operands), New);
118 "Already created a combined instruction for the operand bundle");
125 return Op && isa<VPInstruction>(
Op) &&
126 cast<VPInstruction>(
Op)->getUnderlyingInstr();
128 LLVM_DEBUG(
dbgs() <<
"VPSLP: not all operands are VPInstructions\n");
137 cast<VPInstruction>(
Operands[0])->getUnderlyingInstr();
138 unsigned Opcode = OriginalInstr->
getOpcode();
141 const Instruction *
I = cast<VPInstruction>(
Op)->getUnderlyingInstr();
142 return I->getOpcode() == Opcode &&
143 I->getType()->getPrimitiveSizeInBits() == Width;
151 return cast<VPInstruction>(
Op)->getParent() != &this->BB;
158 [](
VPValue *
Op) {
return Op->hasMoreThanOneUniqueUser(); })) {
159 LLVM_DEBUG(
dbgs() <<
"VPSLP: Some operands have multiple users.\n");
167 if (Opcode == Instruction::Load) {
168 unsigned LoadsSeen = 0;
170 for (
auto &
I : *Parent) {
171 auto *VPI = dyn_cast<VPInstruction>(&
I);
174 if (VPI->getOpcode() == Instruction::Load &&
180 if (LoadsSeen > 0 && VPI->mayWriteToMemory()) {
182 dbgs() <<
"VPSLP: instruction modifying memory between loads\n");
188 return cast<LoadInst>(cast<VPInstruction>(
Op)->getUnderlyingInstr())
191 LLVM_DEBUG(
dbgs() <<
"VPSLP: only simple loads are supported.\n");
196 if (Opcode == Instruction::Store)
198 return cast<StoreInst>(cast<VPInstruction>(
Op)->getUnderlyingInstr())
201 LLVM_DEBUG(
dbgs() <<
"VPSLP: only simple stores are supported.\n");
209 unsigned OperandIndex) {
213 auto *U = cast<VPInstruction>(V);
214 Operands.push_back(U->getOperand(OperandIndex));
221 cast<VPInstruction>(Values[0])->
getOpcode());
227 auto *VPI = cast<VPInstruction>(Values[0]);
229 switch (VPI->getOpcode()) {
230 case Instruction::Load:
232 case Instruction::Store:
236 for (
unsigned I = 0, NumOps = VPI->getNumOperands();
I < NumOps; ++
I)
246 unsigned Opcode = cast<VPInstruction>(Values[0])->getOpcode();
248 return cast<VPInstruction>(V)->getOpcode() != Opcode;
258 if (
A->getOpcode() !=
B->getOpcode())
261 if (
A->getOpcode() != Instruction::Load &&
262 A->getOpcode() != Instruction::Store)
267 return GA && GB && GA == GB && GA->getIndex(
A) + 1 == GB->getIndex(
B);
274 auto *I1 = dyn_cast<VPInstruction>(V1);
275 auto *I2 = dyn_cast<VPInstruction>(V2);
284 for (
unsigned I = 0, EV1 = I1->getNumOperands();
I < EV1; ++
I)
285 for (
unsigned J = 0, EV2 = I2->getNumOperands(); J < EV2; ++J)
287 getLAScore(I1->getOperand(
I), I2->getOperand(J), MaxLevel - 1, IAI);
291std::pair<VPlanSlp::OpMode, VPValue *>
295 assert((Mode == OpMode::Load || Mode == OpMode::Opcode) &&
296 "Currently we only handle load and commutative opcodes");
301 << *cast<VPInstruction>(
Last)->getUnderlyingInstr() <<
" ");
302 for (
auto *Candidate : Candidates) {
303 auto *LastI = cast<VPInstruction>(
Last);
304 auto *CandidateI = cast<VPInstruction>(Candidate);
306 LLVM_DEBUG(
dbgs() << *cast<VPInstruction>(Candidate)->getUnderlyingInstr()
313 if (BestCandidates.
empty())
314 return {OpMode::Failed,
nullptr};
316 if (BestCandidates.
size() == 1)
317 return {
Mode, BestCandidates[0]};
320 unsigned BestScore = 0;
322 unsigned PrevScore = ~0
u;
326 for (
auto *Candidate : BestCandidates) {
328 if (PrevScore == ~0u)
330 if (PrevScore != Score)
334 if (Score > BestScore) {
343 << *cast<VPInstruction>(Best)->getUnderlyingInstr()
345 Candidates.erase(Best);
358 for (
auto &
Operands : MultiNodeOps) {
360 if (cast<VPInstruction>(
Operands.second[0])->getOpcode() ==
362 Mode.push_back(OpMode::Load);
364 Mode.push_back(OpMode::Opcode);
367 for (
unsigned Lane = 1, E = MultiNodeOps[0].second.size(); Lane < E; ++Lane) {
368 LLVM_DEBUG(
dbgs() <<
" Finding best value for lane " << Lane <<
"\n");
371 for (
auto Ops : MultiNodeOps) {
373 dbgs() << *cast<VPInstruction>(Ops.second[Lane])->getUnderlyingInstr()
375 Candidates.
insert(Ops.second[Lane]);
379 for (
unsigned Op = 0, E = MultiNodeOps.size();
Op < E; ++
Op) {
381 if (Mode[
Op] == OpMode::Failed)
385 std::pair<OpMode, VPValue *> Res =
386 getBest(Mode[
Op],
Last, Candidates, IAI);
398#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
401 for (
auto *
Op : Values) {
402 if (
auto *VPInstr = cast_or_null<VPInstruction>(
Op))
403 if (
auto *Instr = VPInstr->getUnderlyingInstr()) {
407 dbgs() <<
" nullptr | ";
417 auto I = BundleToCombined.find(to_vector<4>(Values));
418 if (
I != BundleToCombined.end()) {
423 for (
auto *V : Values) {
424 auto UI = V->user_begin();
425 auto *FirstUser = *UI++;
426 while (UI != V->user_end()) {
427 assert(*UI == FirstUser &&
"Currently we only support SLP trees.");
437 dbgs() <<
"buildGraph: ";
441 if (!areVectorizable(Values))
445 unsigned ValuesOpcode = *
getOpcode(Values);
449 bool MultiNodeRoot = !MultiNodeActive;
450 MultiNodeActive =
true;
453 dbgs() <<
" Visiting Commutative";
458 if (OperandsOpcode && OperandsOpcode ==
getOpcode(Values)) {
473 MultiNodeActive =
false;
475 auto FinalOrder = reorderMultiNodeOps();
477 MultiNodeOps.clear();
478 for (
auto &Ops : FinalOrder) {
480 Ops.first->replaceAllUsesWith(NewOp);
481 for (
unsigned i = 0; i < CombinedOperands.
size(); i++)
482 if (CombinedOperands[i] == Ops.first)
483 CombinedOperands[i] = NewOp;
491 if (ValuesOpcode == Instruction::Load)
493 CombinedOperands.
push_back(cast<VPInstruction>(V)->getOperand(0));
500 switch (ValuesOpcode) {
501 case Instruction::Load:
504 case Instruction::Store:
508 Opcode = ValuesOpcode;
515 assert(CombinedOperands.
size() > 0 &&
"Need more some operands");
516 auto *Inst = cast<VPInstruction>(Values[0])->getUnderlyingInstr();
517 auto *VPI =
new VPInstruction(Opcode, CombinedOperands, Inst->getDebugLoc());
520 << *cast<VPInstruction>(Values[0]) <<
"\n");
521 addCombined(Values, VPI);
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file defines the DenseMap class.
mir Rename Register Operands
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static unsigned LookaheadMaxDepth
static bool areConsecutiveOrMatch(VPInstruction *A, VPInstruction *B, VPInterleavedAccessInfo &IAI)
Returns true if A and B access sequential memory if they are loads or stores or if they have identica...
static unsigned getLAScore(VPValue *V1, VPValue *V2, unsigned MaxLevel, VPInterleavedAccessInfo &IAI)
Implements getLAScore from Listing 7 in the paper.
static bool areCommutative(ArrayRef< VPValue * > Values)
static SmallVector< VPValue *, 4 > getOperands(ArrayRef< VPValue * > Values, unsigned OperandIndex)
This file contains the declarations for VPlan-based SLP.
This file contains the declarations of the entities induced by Vectorization Plans,...
This file contains the declarations of the Vectorization Plan base classes:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
This class represents an Operation in the Expression.
bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
The group of interleaved loads/stores sharing the same stride and close to each other.
Drive the analysis of interleaved memory accesses in the loop.
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
BlockT * getEntry() const
Get the entry BasicBlock of the Region.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
VPRegionBlock * getParent()
This is a concrete Recipe that models a single VPlan-level instruction.
VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI)
InterleaveGroup< VPInstruction > * getInterleaveGroup(VPInstruction *Instr) const
Get the interleave group that Instr belongs to.
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
VPInstruction * buildGraph(ArrayRef< VPValue * > Operands)
Tries to build an SLP tree rooted at Operands and returns a VPInstruction combining Operands,...
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Type * getType() const
All values are typed, get the type of this value.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
NodeAddr< InstrNode * > Instr
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
DWARFExpression::Operation Op
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
This struct is a compact representation of a valid (non-zero power of two) alignment.