22#define DEBUG_TYPE "vplan"
26 if (
const auto *CanIV = dyn_cast<VPCanonicalIVPHIRecipe>(
27 &LoopRegion->getEntryBasicBlock()->front())) {
37 CanonicalIVTy = TC->getLiveInIRValue()->getType();
40 CanonicalIVTy = cast<VPExpandSCEVRecipe>(TC)->getSCEV()->getType();
45 for (
unsigned I = 1, E = R->getNumIncomingValues();
I != E; ++
I) {
46 VPValue *Inc = R->getIncomingValue(
I);
48 "different types inferred for different incoming values");
49 CachedTypes[Inc] = ResTy;
57 auto SetResultTyFromOp = [
this,
R]() {
59 for (
unsigned Op = 1;
Op !=
R->getNumOperands(); ++
Op) {
62 "different types inferred for different operands");
63 CachedTypes[OtherV] = ResTy;
68 unsigned Opcode =
R->getOpcode();
70 return SetResultTyFromOp();
73 case Instruction::ExtractElement:
74 case Instruction::Freeze:
78 case Instruction::Select: {
82 "different types inferred for different operands");
83 CachedTypes[OtherV] = ResTy;
86 case Instruction::ICmp:
87 case Instruction::FCmp:
91 "different types inferred for different operands");
101 case Instruction::PHI:
112 return SetResultTyFromOp();
120 if (
auto *VecTy = dyn_cast<VectorType>(
BaseTy))
121 return VecTy->getElementType();
127 "LogicalAnd operands should be bool");
142 dbgs() <<
"LV: Found unhandled opcode for: ";
143 R->getVPSingleValue()->dump();
149 unsigned Opcode =
R->getOpcode();
154 "types for both operands must match for binary op");
155 CachedTypes[
R->getOperand(1)] = ResTy;
160 case Instruction::ICmp:
161 case Instruction::FCmp:
163 case Instruction::FNeg:
164 case Instruction::Freeze:
166 case Instruction::ExtractValue: {
167 assert(
R->getNumOperands() == 2 &&
"expected single level extractvalue");
169 auto *CI = cast<ConstantInt>(
R->getOperand(1)->getLiveInIRValue());
170 return StructTy->getTypeAtIndex(CI->getZExtValue());
178 dbgs() <<
"LV: Found unhandled opcode for: ";
179 R->getVPSingleValue()->dump();
185 auto &CI = *cast<CallInst>(
R->getUnderlyingInstr());
190 assert((isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(R)) &&
191 "Store recipes should not define any values");
192 return cast<LoadInst>(&
R->getIngredient())->getType();
199 "different types inferred for different operands");
200 CachedTypes[OtherV] = ResTy;
205 unsigned Opcode =
R->getUnderlyingInstr()->getOpcode();
211 "inferred types for operands of binary op don't match");
212 CachedTypes[
R->getOperand(1)] = ResTy;
217 return R->getUnderlyingInstr()->getType();
220 case Instruction::Call: {
221 unsigned CallIdx =
R->getNumOperands() - (
R->isPredicated() ? 2 : 1);
222 return cast<Function>(
R->getOperand(CallIdx)->getLiveInIRValue())
225 case Instruction::Select: {
228 "inferred types for operands of select op don't match");
229 CachedTypes[
R->getOperand(2)] = ResTy;
232 case Instruction::ICmp:
233 case Instruction::FCmp:
235 case Instruction::Alloca:
236 case Instruction::ExtractValue:
237 return R->getUnderlyingInstr()->getType();
238 case Instruction::Freeze:
239 case Instruction::FNeg:
240 case Instruction::GetElementPtr:
242 case Instruction::Load:
243 return cast<LoadInst>(
R->getUnderlyingInstr())->getType();
244 case Instruction::Store:
254 dbgs() <<
"LV: Found unhandled opcode for: ";
255 R->getVPSingleValue()->dump();
261 if (
Type *CachedTy = CachedTypes.lookup(V))
265 if (
auto *IRValue = V->getLiveInIRValue())
266 return IRValue->getType();
269 return CanonicalIVTy;
277 [
this](
const auto *R) {
284 .Case<VPWidenIntOrFpInductionRecipe, VPDerivedIVRecipe>(
285 [](
const auto *R) {
return R->getScalarType(); })
295 [](
const auto *R) {
return R->getResultType(); })
298 [
this](
const auto *R) {
return inferScalarTypeForRecipe(R); })
301 return V->getUnderlyingValue()->getType();
304 return R->getSCEV()->getType();
306 .Case<VPReductionRecipe>([
this](
const auto *R) {
309 .Case<VPExpressionRecipe>([
this](
const auto *R) {
313 assert(ResultTy &&
"could not infer type for the given VPValue");
314 CachedTypes[V] = ResultTy;
322 for (
VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
325 auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
326 if (!RepR || !
match(RepR->getUnderlyingInstr(),
327 PatternMatch::m_Intrinsic<Intrinsic::assume>()))
337 while (!Worklist.
empty()) {
340 auto *OpR =
Op->getDefiningRecipe();
341 if (!OpR || OpR->mayHaveSideEffects() || EphRecipes.
contains(OpR))
344 auto *UR = dyn_cast<VPRecipeBase>(U);
345 return !UR || !EphRecipes.contains(UR);
354template void DomTreeBuilder::Calculate<DominatorTreeBase<VPBlockBase, false>>(
363 for (
auto &R : *
A->getParent()) {
373 if (ParentA == ParentB)
374 return LocalComesBefore(
A,
B);
378 auto *
Region = dyn_cast_or_null<VPRegionBlock>(R->getParent()->getParent());
381 Region->getNumPredecessors() == 1 &&
"Expected SESE region!");
382 assert(R->getParent()->size() == 1 &&
383 "A recipe in an original replicator region must be the only "
384 "recipe in its block");
390 "No replicate regions expected at this point");
392 "No replicate regions expected at this point");
400 if (
auto *RR = dyn_cast<VPReductionPHIRecipe>(R))
401 return RR->getVFScaleFactor();
402 if (
auto *RR = dyn_cast<VPPartialReductionRecipe>(R))
403 return RR->getVFScaleFactor();
405 (!isa<VPInstruction>(R) || cast<VPInstruction>(R)->
getOpcode() !=
407 "getting scaling factor of reduction-start-vector not implemented yet");
412 unsigned OverrideMaxNumRegs)
const {
414 return LU.second > (OverrideMaxNumRegs > 0
448 for (
VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
449 if (!VPBB->getParent())
455 for (
VPValue *U : R.operands()) {
456 auto *DefR = U->getDefiningRecipe();
462 if (!DefR && (!U->getLiveInIRValue() ||
463 !isa<Instruction>(U->getLiveInIRValue())))
473 EndPoint[DefR] = Idx2Recipe.
size();
481 if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
482 EndPoint[&R] = Idx2Recipe.
size();
502 LLVM_DEBUG(
dbgs() <<
"LV(REG): Calculating max register usage:\n");
506 const auto &TTICapture =
TTI;
510 !TTICapture.isElementTypeLegalForScalableVector(Ty)))
519 for (
unsigned int Idx = 0, Sz = Idx2Recipe.
size();
Idx < Sz; ++
Idx) {
523 RecipeList &
List = TransposeEnds[
Idx];
529 if (!Ends.
count(R) && !R->mayHaveSideEffects())
535 if (isa<VPSingleDefRecipe>(R) &&
537 cast<VPSingleDefRecipe>(R)->getUnderlyingValue()))
541 for (
unsigned J = 0, E = VFs.
size(); J < E; ++J) {
549 for (
auto *R : OpenIntervals) {
557 if (VFs[J].isScalar() ||
560 (isa<VPInstruction>(R) &&
562 (isa<VPReductionPHIRecipe>(R) &&
563 (cast<VPReductionPHIRecipe>(R))->isInLoop())) {
573 ElementCount VF = VFs[J].divideCoefficientBy(ScaleFactor);
575 dbgs() <<
"LV(REG): Scaled down VF from " << VFs[J] <<
" to " << VF
576 <<
" for " << *R <<
"\n";
579 for (
VPValue *DefV : R->definedValues()) {
582 RegUsage[ClassID] += GetRegUsage(ScalarTy, VF);
588 auto &Entry = MaxUsages[J][Pair.first];
589 Entry = std::max(Entry, Pair.second);
594 << OpenIntervals.
size() <<
'\n');
611 for (
auto *In : LoopInvariants) {
623 dbgs() <<
"LV(REG): VF = " << VFs[
Idx] <<
'\n';
624 dbgs() <<
"LV(REG): Found max usage: " << MaxUsages[
Idx].
size()
626 for (
const auto &pair : MaxUsages[
Idx]) {
627 dbgs() <<
"LV(REG): RegisterClass: "
631 dbgs() <<
"LV(REG): Found invariant usage: " << Invariant.
size()
633 for (
const auto &pair : Invariant) {
634 dbgs() <<
"LV(REG): RegisterClass: "
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
ReachingDefAnalysis InstSet & ToRemove
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::pair< uint64_t, uint64_t > Interval
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
static unsigned getVFScaleFactor(VPRecipeBase *R)
Get the VF scaling factor applied to the recipe's output, if the recipe has one.
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
This file contains the declarations of the Vectorization Plan base classes:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
This class represents an Operation in the Expression.
Implements a dense probed hash-table based set.
Core dominator tree base class.
bool properlyDominates(const DomTreeNodeBase< VPBlockBase > *A, const DomTreeNodeBase< VPBlockBase > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getFixed(ScalarTy MinVal)
bool isBitwiseLogicOp() const
Return true if this is and/or/xor.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isTokenTy() const
Return true if this is 'token'.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
const VPBasicBlock * getEntryBasicBlock() const
A recipe for generating conditional branches on the bits of a mask.
Canonical scalar induction phi of the vector loop.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
A recipe for generating the phi node for the current index of elements, adjusted in accordance with E...
Recipe to expand a SCEV expression.
A specialization of VPInstruction augmenting it with a dedicated result type, to be used when the opc...
This is a concrete Recipe that models a single VPlan-level instruction.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
@ ComputeAnyOfResult
Compute the final result of a AnyOf reduction with select(cmp(),x,y), where one of (x,...
@ ExtractPenultimateElement
@ ResumeForEpilogue
Explicit user for the resume phi of the canonical induction in the main VPlan, used by the epilogue v...
@ FirstOrderRecurrenceSplice
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ CanonicalIVIncrementForPart
@ CalculateTripCountMinusVF
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
A recipe for forming partial reductions.
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
A recipe for handling reduction phis.
A recipe to represent inloop reduction operations, performing a reduction on a vector operand into a ...
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
const VPBlockBase * getEntry() const
const VPBlockBase * getExiting() const
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
An analysis for type-inference for VPValues.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
VPTypeAnalysis(const VPlan &Plan)
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
A recipe to compute the pointers for widened memory accesses of IndexTy.
A recipe for widening Call instructions using library calls.
A Recipe for widening the canonical induction variable of the vector loop.
VPWidenCastRecipe is a recipe to create vector cast instructions.
A recipe for handling GEP instructions.
A recipe for widening vector intrinsics.
A common base class for widening memory operations.
A recipe for widened phis.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
VPValue & getVectorTripCount()
The vector trip count.
VPValue * getTripCount() const
The trip count of the original loop.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool match(Val *V, const Pattern &P)
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
This is an optimization pass for GlobalISel generic memory operations.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
SmallVector< VPRegisterUsage, 8 > calculateRegisterUsageForPlan(VPlan &Plan, ArrayRef< ElementCount > VFs, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &ValuesToIgnore)
Estimate the register usage for Plan and vectorization factors in VFs by calculating the highest numb...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
void collectEphemeralRecipesForVPlan(VPlan &Plan, DenseSet< VPRecipeBase * > &EphRecipes)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
DWARFExpression::Operation Op
A MapVector that performs no allocations if smaller than a certain size.
A recipe for handling first-order recurrence phis.
A struct that represents some properties of the register usage of a loop.
SmallMapVector< unsigned, unsigned, 4 > MaxLocalUsers
Holds the maximum number of concurrent live intervals in the loop.
bool exceedsMaxNumRegs(const TargetTransformInfo &TTI, unsigned OverrideMaxNumRegs=0) const
Check if any of the tracked live intervals exceeds the number of available registers for the target.
SmallMapVector< unsigned, unsigned, 4 > LoopInvariantRegs
Holds the number of loop invariant values that are used in the loop.
A recipe for widening select instructions.