40#define DEBUG_TYPE "vplan-slp"
51 visitBlock(
Base, Old2New, IAI);
55void VPInterleavedAccessInfo::visitBlock(
VPBlockBase *
Block, Old2NewTy &Old2New,
58 for (VPRecipeBase &VPI : *VPBB) {
71 auto NewIGIter = Old2New.find(IG);
72 if (NewIGIter == Old2New.end())
73 Old2New[IG] =
new InterleaveGroup<VPInstruction>(
74 IG->getFactor(), IG->isReverse(), IG->getAlign());
76 if (Inst == IG->getInsertPos())
77 Old2New[IG]->setInsertPos(VPInst);
79 InterleaveGroupMap[VPInst] = Old2New[IG];
80 InterleaveGroupMap[VPInst]->insertMember(
81 VPInst, IG->getIndex(Inst),
82 Align(IG->isReverse() ? (-1) *
int(IG->getFactor())
86 visitRegion(Region, Old2New, IAI);
101 CompletelySLP =
false;
109 unsigned BundleSize = 0;
112 assert(!
T->isVectorTy() &&
"Only scalar types supported for now");
113 BundleSize +=
T->getScalarSizeInBits();
115 WidestBundleBits = std::max(WidestBundleBits, BundleSize);
120 "Already created a combined instruction for the operand bundle");
130 LLVM_DEBUG(
dbgs() <<
"VPSLP: not all operands are VPInstructions\n");
140 unsigned Opcode = OriginalInstr->
getOpcode();
144 return I->getOpcode() == Opcode &&
145 I->getType()->getPrimitiveSizeInBits() == Width;
160 [](VPValue *
Op) {
return Op->hasMoreThanOneUniqueUser(); })) {
161 LLVM_DEBUG(
dbgs() <<
"VPSLP: Some operands have multiple users.\n");
169 if (Opcode == Instruction::Load) {
170 unsigned LoadsSeen = 0;
172 for (
auto &
I : *Parent) {
176 if (VPI->getOpcode() == Instruction::Load &&
182 if (LoadsSeen > 0 && VPI->mayWriteToMemory()) {
184 dbgs() <<
"VPSLP: instruction modifying memory between loads\n");
193 LLVM_DEBUG(
dbgs() <<
"VPSLP: only simple loads are supported.\n");
198 if (Opcode == Instruction::Store)
203 LLVM_DEBUG(
dbgs() <<
"VPSLP: only simple stores are supported.\n");
211 unsigned OperandIndex) {
216 Operands.push_back(U->getOperand(OperandIndex));
231 switch (VPI->getOpcode()) {
232 case Instruction::Load:
234 case Instruction::Store:
260 if (
A->getOpcode() !=
B->getOpcode())
263 if (
A->getOpcode() != Instruction::Load &&
264 A->getOpcode() != Instruction::Store)
269 return GA && GB && GA == GB && GA->getIndex(
A) + 1 == GB->getIndex(
B);
286 for (
unsigned I = 0, EV1 = I1->getNumOperands();
I < EV1; ++
I)
287 for (
unsigned J = 0, EV2 = I2->getNumOperands(); J < EV2; ++J)
289 getLAScore(I1->getOperand(
I), I2->getOperand(J), MaxLevel - 1, IAI);
293std::pair<VPlanSlp::OpMode, VPValue *>
298 "Currently we only handle load and commutative opcodes");
304 for (
auto *Candidate : Candidates) {
315 if (BestCandidates.
empty())
316 return {OpMode::Failed,
nullptr};
318 if (BestCandidates.
size() == 1)
319 return {
Mode, BestCandidates[0]};
321 VPValue *Best =
nullptr;
322 unsigned BestScore = 0;
324 unsigned PrevScore = ~0
u;
328 for (
auto *Candidate : BestCandidates) {
330 if (PrevScore == ~0u)
332 if (PrevScore != Score)
336 if (Score > BestScore) {
347 Candidates.erase(Best);
353 SmallVector<MultiNodeOpTy, 4> FinalOrder;
355 FinalOrder.
reserve(MultiNodeOps.size());
356 Mode.reserve(MultiNodeOps.size());
360 for (
auto &
Operands : MultiNodeOps) {
364 Mode.push_back(OpMode::Load);
366 Mode.push_back(OpMode::Opcode);
369 for (
unsigned Lane = 1,
E = MultiNodeOps[0].second.size(); Lane <
E; ++Lane) {
370 LLVM_DEBUG(
dbgs() <<
" Finding best value for lane " << Lane <<
"\n");
371 SmallPtrSet<VPValue *, 4> Candidates;
373 for (
auto Ops : MultiNodeOps) {
381 for (
unsigned Op = 0,
E = MultiNodeOps.size();
Op <
E; ++
Op) {
383 if (
Mode[
Op] == OpMode::Failed)
386 VPValue *
Last = FinalOrder[
Op].second[Lane - 1];
387 std::pair<OpMode, VPValue *> Res =
400#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
403 for (
auto *
Op : Values) {
405 if (
auto *Instr = VPInstr->getUnderlyingInstr()) {
409 dbgs() <<
" nullptr | ";
420 if (
I != BundleToCombined.end()) {
425 for (
auto *V : Values) {
426 auto UI = V->user_begin();
427 auto *FirstUser = *UI++;
428 while (UI != V->user_end()) {
429 assert(*UI == FirstUser &&
"Currently we only support SLP trees.");
439 dbgs() <<
"buildGraph: ";
443 if (!areVectorizable(Values))
447 unsigned ValuesOpcode = *
getOpcode(Values);
451 bool MultiNodeRoot = !MultiNodeActive;
452 MultiNodeActive =
true;
455 dbgs() <<
" Visiting Commutative";
460 if (OperandsOpcode && OperandsOpcode ==
getOpcode(Values)) {
475 MultiNodeActive =
false;
477 auto FinalOrder = reorderMultiNodeOps();
479 MultiNodeOps.clear();
480 for (
auto &
Ops : FinalOrder) {
482 Ops.first->replaceAllUsesWith(NewOp);
483 for (
unsigned i = 0; i < CombinedOperands.
size(); i++)
484 if (CombinedOperands[i] ==
Ops.first)
485 CombinedOperands[i] = NewOp;
493 if (ValuesOpcode == Instruction::Load)
502 switch (ValuesOpcode) {
503 case Instruction::Load:
506 case Instruction::Store:
510 Opcode = ValuesOpcode;
517 assert(CombinedOperands.
size() > 0 &&
"Need more some operands");
519 auto *VPI =
new VPInstruction(Opcode, CombinedOperands, Inst->getDebugLoc());
521 LLVM_DEBUG(
dbgs() <<
"Create VPInstruction " << *VPI <<
" " << Values[0]
523 addCombined(Values, VPI);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file defines the DenseMap class.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
mir Rename Register Operands
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
This file defines the SmallVector class.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static unsigned LookaheadMaxDepth
static bool areConsecutiveOrMatch(VPInstruction *A, VPInstruction *B, VPInterleavedAccessInfo &IAI)
Returns true if A and B access sequential memory if they are loads or stores or if they have identica...
static unsigned getLAScore(VPValue *V1, VPValue *V2, unsigned MaxLevel, VPInterleavedAccessInfo &IAI)
Implements getLAScore from Listing 7 in the paper.
static bool areCommutative(ArrayRef< VPValue * > Values)
static SmallVector< VPValue *, 4 > getOperands(ArrayRef< VPValue * > Values, unsigned OperandIndex)
This file contains the declarations for VPlan-based SLP.
This file contains the declarations of the entities induced by Vectorization Plans,...
This file contains the declarations of the Vectorization Plan base classes:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Drive the analysis of interleaved memory accesses in the loop.
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
BlockT * getEntry() const
Get the entry BasicBlock of the Region.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
This is a concrete Recipe that models a single VPlan-level instruction.
LLVM_ABI_FOR_TEST VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI)
InterleaveGroup< VPInstruction > * getInterleaveGroup(VPInstruction *Instr) const
Get the interleave group that Instr belongs to.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
LLVM_ABI_FOR_TEST VPInstruction * buildGraph(ArrayRef< VPValue * > Operands)
Tries to build an SLP tree rooted at Operands and returns a VPInstruction combining Operands,...
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Type * getType() const
All values are typed, get the type of this value.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
NodeAddr< InstrNode * > Instr
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto cast_or_null(const Y &Val)
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.