40#define DEBUG_TYPE "vplan-slp"
51 visitBlock(
Base, Old2New, IAI);
55void VPInterleavedAccessInfo::visitBlock(
VPBlockBase *
Block, Old2NewTy &Old2New,
59 if (isa<VPWidenPHIRecipe>(&VPI))
61 auto *VPInst = dyn_cast<VPInstruction>(&VPI);
64 auto *Inst = dyn_cast_or_null<Instruction>(VPInst->getUnderlyingValue());
71 auto NewIGIter = Old2New.find(IG);
72 if (NewIGIter == Old2New.end())
74 IG->getFactor(), IG->isReverse(), IG->getAlign());
76 if (Inst == IG->getInsertPos())
77 Old2New[IG]->setInsertPos(VPInst);
79 InterleaveGroupMap[VPInst] = Old2New[IG];
80 InterleaveGroupMap[VPInst]->insertMember(
81 VPInst, IG->getIndex(Inst),
82 Align(IG->isReverse() ? (-1) *
int(IG->getFactor())
86 visitRegion(
Region, Old2New, IAI);
101 CompletelySLP =
false;
107 return cast<VPInstruction>(V)->getUnderlyingInstr();
109 unsigned BundleSize = 0;
111 Type *
T = cast<VPInstruction>(V)->getUnderlyingInstr()->getType();
112 assert(!
T->isVectorTy() &&
"Only scalar types supported for now");
113 BundleSize +=
T->getScalarSizeInBits();
115 WidestBundleBits = std::max(WidestBundleBits, BundleSize);
118 auto Res = BundleToCombined.try_emplace(to_vector<4>(
Operands), New);
120 "Already created a combined instruction for the operand bundle");
127 return Op && isa<VPInstruction>(
Op) &&
128 cast<VPInstruction>(
Op)->getUnderlyingInstr();
130 LLVM_DEBUG(
dbgs() <<
"VPSLP: not all operands are VPInstructions\n");
139 cast<VPInstruction>(
Operands[0])->getUnderlyingInstr();
140 unsigned Opcode = OriginalInstr->
getOpcode();
143 const Instruction *
I = cast<VPInstruction>(
Op)->getUnderlyingInstr();
144 return I->getOpcode() == Opcode &&
145 I->getType()->getPrimitiveSizeInBits() == Width;
153 return cast<VPInstruction>(
Op)->getParent() != &this->BB;
160 [](
VPValue *
Op) {
return Op->hasMoreThanOneUniqueUser(); })) {
161 LLVM_DEBUG(
dbgs() <<
"VPSLP: Some operands have multiple users.\n");
169 if (Opcode == Instruction::Load) {
170 unsigned LoadsSeen = 0;
172 for (
auto &
I : *Parent) {
173 auto *VPI = dyn_cast<VPInstruction>(&
I);
176 if (VPI->getOpcode() == Instruction::Load &&
182 if (LoadsSeen > 0 && VPI->mayWriteToMemory()) {
184 dbgs() <<
"VPSLP: instruction modifying memory between loads\n");
190 return cast<LoadInst>(cast<VPInstruction>(
Op)->getUnderlyingInstr())
193 LLVM_DEBUG(
dbgs() <<
"VPSLP: only simple loads are supported.\n");
198 if (Opcode == Instruction::Store)
200 return cast<StoreInst>(cast<VPInstruction>(
Op)->getUnderlyingInstr())
203 LLVM_DEBUG(
dbgs() <<
"VPSLP: only simple stores are supported.\n");
211 unsigned OperandIndex) {
215 auto *U = cast<VPInstruction>(V);
216 Operands.push_back(U->getOperand(OperandIndex));
223 cast<VPInstruction>(Values[0])->
getOpcode());
229 auto *VPI = cast<VPInstruction>(Values[0]);
231 switch (VPI->getOpcode()) {
232 case Instruction::Load:
234 case Instruction::Store:
238 for (
unsigned I = 0, NumOps = VPI->getNumOperands();
I < NumOps; ++
I)
248 unsigned Opcode = cast<VPInstruction>(Values[0])->getOpcode();
250 return cast<VPInstruction>(V)->getOpcode() != Opcode;
260 if (
A->getOpcode() !=
B->getOpcode())
263 if (
A->getOpcode() != Instruction::Load &&
264 A->getOpcode() != Instruction::Store)
269 return GA && GB && GA == GB && GA->getIndex(
A) + 1 == GB->getIndex(
B);
276 auto *I1 = dyn_cast<VPInstruction>(V1);
277 auto *I2 = dyn_cast<VPInstruction>(V2);
286 for (
unsigned I = 0, EV1 = I1->getNumOperands();
I < EV1; ++
I)
287 for (
unsigned J = 0, EV2 = I2->getNumOperands(); J < EV2; ++J)
289 getLAScore(I1->getOperand(
I), I2->getOperand(J), MaxLevel - 1, IAI);
293std::pair<VPlanSlp::OpMode, VPValue *>
297 assert((Mode == OpMode::Load || Mode == OpMode::Opcode) &&
298 "Currently we only handle load and commutative opcodes");
303 << *cast<VPInstruction>(
Last)->getUnderlyingInstr() <<
" ");
304 for (
auto *Candidate : Candidates) {
305 auto *LastI = cast<VPInstruction>(
Last);
306 auto *CandidateI = cast<VPInstruction>(Candidate);
308 LLVM_DEBUG(
dbgs() << *cast<VPInstruction>(Candidate)->getUnderlyingInstr()
315 if (BestCandidates.
empty())
316 return {OpMode::Failed,
nullptr};
318 if (BestCandidates.
size() == 1)
319 return {
Mode, BestCandidates[0]};
322 unsigned BestScore = 0;
324 unsigned PrevScore = ~0
u;
328 for (
auto *Candidate : BestCandidates) {
330 if (PrevScore == ~0u)
332 if (PrevScore != Score)
336 if (Score > BestScore) {
345 << *cast<VPInstruction>(Best)->getUnderlyingInstr()
347 Candidates.erase(Best);
360 for (
auto &
Operands : MultiNodeOps) {
362 if (cast<VPInstruction>(
Operands.second[0])->getOpcode() ==
364 Mode.push_back(OpMode::Load);
366 Mode.push_back(OpMode::Opcode);
369 for (
unsigned Lane = 1, E = MultiNodeOps[0].second.size(); Lane < E; ++Lane) {
370 LLVM_DEBUG(
dbgs() <<
" Finding best value for lane " << Lane <<
"\n");
373 for (
auto Ops : MultiNodeOps) {
375 dbgs() << *cast<VPInstruction>(Ops.second[Lane])->getUnderlyingInstr()
377 Candidates.
insert(Ops.second[Lane]);
381 for (
unsigned Op = 0, E = MultiNodeOps.size();
Op < E; ++
Op) {
383 if (Mode[
Op] == OpMode::Failed)
387 std::pair<OpMode, VPValue *> Res =
388 getBest(Mode[
Op],
Last, Candidates, IAI);
400#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
403 for (
auto *
Op : Values) {
404 if (
auto *VPInstr = cast_or_null<VPInstruction>(
Op))
405 if (
auto *Instr = VPInstr->getUnderlyingInstr()) {
409 dbgs() <<
" nullptr | ";
419 auto I = BundleToCombined.find(to_vector<4>(Values));
420 if (
I != BundleToCombined.end()) {
425 for (
auto *V : Values) {
426 auto UI = V->user_begin();
427 auto *FirstUser = *UI++;
428 while (UI != V->user_end()) {
429 assert(*UI == FirstUser &&
"Currently we only support SLP trees.");
439 dbgs() <<
"buildGraph: ";
443 if (!areVectorizable(Values))
447 unsigned ValuesOpcode = *
getOpcode(Values);
451 bool MultiNodeRoot = !MultiNodeActive;
452 MultiNodeActive =
true;
455 dbgs() <<
" Visiting Commutative";
460 if (OperandsOpcode && OperandsOpcode ==
getOpcode(Values)) {
475 MultiNodeActive =
false;
477 auto FinalOrder = reorderMultiNodeOps();
479 MultiNodeOps.clear();
480 for (
auto &Ops : FinalOrder) {
482 Ops.first->replaceAllUsesWith(NewOp);
483 for (
unsigned i = 0; i < CombinedOperands.
size(); i++)
484 if (CombinedOperands[i] == Ops.first)
485 CombinedOperands[i] = NewOp;
493 if (ValuesOpcode == Instruction::Load)
495 CombinedOperands.
push_back(cast<VPInstruction>(V)->getOperand(0));
502 switch (ValuesOpcode) {
503 case Instruction::Load:
506 case Instruction::Store:
510 Opcode = ValuesOpcode;
517 assert(CombinedOperands.
size() > 0 &&
"Need more some operands");
518 auto *Inst = cast<VPInstruction>(Values[0])->getUnderlyingInstr();
519 auto *VPI =
new VPInstruction(Opcode, CombinedOperands, Inst->getDebugLoc());
521 LLVM_DEBUG(
dbgs() <<
"Create VPInstruction " << *VPI <<
" " << Values[0]
523 addCombined(Values, VPI);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file defines the DenseMap class.
mir Rename Register Operands
This file defines the SmallVector class.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static unsigned LookaheadMaxDepth
static bool areConsecutiveOrMatch(VPInstruction *A, VPInstruction *B, VPInterleavedAccessInfo &IAI)
Returns true if A and B access sequential memory if they are loads or stores or if they have identica...
static unsigned getLAScore(VPValue *V1, VPValue *V2, unsigned MaxLevel, VPInterleavedAccessInfo &IAI)
Implements getLAScore from Listing 7 in the paper.
static bool areCommutative(ArrayRef< VPValue * > Values)
static SmallVector< VPValue *, 4 > getOperands(ArrayRef< VPValue * > Values, unsigned OperandIndex)
This file contains the declarations for VPlan-based SLP.
This file contains the declarations of the entities induced by Vectorization Plans,...
This file contains the declarations of the Vectorization Plan base classes:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
This class represents an Operation in the Expression.
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
The group of interleaved loads/stores sharing the same stride and close to each other.
Drive the analysis of interleaved memory accesses in the loop.
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
BlockT * getEntry() const
Get the entry BasicBlock of the Region.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
VPRegionBlock * getParent()
This is a concrete Recipe that models a single VPlan-level instruction.
LLVM_ABI_FOR_TEST VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI)
InterleaveGroup< VPInstruction > * getInterleaveGroup(VPInstruction *Instr) const
Get the interleave group that Instr belongs to.
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
LLVM_ABI_FOR_TEST VPInstruction * buildGraph(ArrayRef< VPValue * > Operands)
Tries to build an SLP tree rooted at Operands and returns a VPInstruction combining Operands,...
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Type * getType() const
All values are typed, get the type of this value.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
NodeAddr< InstrNode * > Instr
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
DWARFExpression::Operation Op
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
This struct is a compact representation of a valid (non-zero power of two) alignment.