LLVM 22.0.0git
SLPVectorizer.cpp File Reference
#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/PriorityQueue.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/DOTGraphTraits.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DebugCounter.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/InstructionCost.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/InjectTLIMappings.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <iterator>
#include <memory>
#include <optional>
#include <set>
#include <string>
#include <tuple>
#include <utility>

Go to the source code of this file.

Classes

class  llvm::slpvectorizer::BoUpSLP
 Bottom Up SLP Vectorizer. More...
struct  llvm::slpvectorizer::BoUpSLP::EdgeInfo
 This structure holds any data we need about the edges being traversed during buildTreeRec(). More...
class  llvm::slpvectorizer::BoUpSLP::LookAheadHeuristics
 A helper class used for scoring candidates for two consecutive lanes. More...
class  llvm::slpvectorizer::BoUpSLP::VLOperands
 A helper data structure to hold the operands of a vector of instructions. More...
struct  llvm::DenseMapInfo< BoUpSLP::EdgeInfo >
struct  llvm::GraphTraits< BoUpSLP * >
struct  llvm::GraphTraits< BoUpSLP * >::ChildIteratorType
 Add the VectorizableTree to the index iterator to be able to return TreeEntry pointers. More...
class  llvm::GraphTraits< BoUpSLP * >::nodes_iterator
 For the node iterator we just need to turn the TreeEntry iterator into a TreeEntry* iterator so that it dereferences to NodeRef. More...
struct  llvm::DOTGraphTraits< BoUpSLP * >
class  llvm::slpvectorizer::BoUpSLP::ShuffleCostEstimator
 Merges shuffle masks and emits final shuffle instruction, if required. More...
class  llvm::slpvectorizer::BoUpSLP::ShuffleInstructionBuilder
 Merges shuffle masks and emits final shuffle instruction, if required. More...

Namespaces

namespace  llvm
 This is an optimization pass for GlobalISel generic memory operations.
namespace  llvm::slpvectorizer
 A private "module" namespace for types and utilities used by this pass.

Macros

#define SV_NAME   "slp-vectorizer"
#define DEBUG_TYPE   "SLP"

Functions

 STATISTIC (NumVectorInstructions, "Number of vector instructions generated")
 DEBUG_COUNTER (VectorizedGraphs, "slp-vectorized", "Controls which SLP graphs should be vectorized.")
static bool isValidElementType (Type *Ty)
 Predicate for the element types that the SLP vectorizer supports.
static TypegetValueType (Value *V)
 Returns the type of the given value/instruction V.
static unsigned getNumElements (Type *Ty)
static FixedVectorTypegetWidenedType (Type *ScalarTy, unsigned VF)
static unsigned getFullVectorNumberOfElements (const TargetTransformInfo &TTI, Type *Ty, unsigned Sz)
 Returns the number of elements of the given type Ty, not less than Sz, which forms type, which splits by TTI into whole vector types during legalization.
static unsigned getFloorFullVectorNumberOfElements (const TargetTransformInfo &TTI, Type *Ty, unsigned Sz)
 Returns the number of elements of the given type Ty, not greater than Sz, which forms type, which splits by TTI into whole vector types during legalization.
static void transformScalarShuffleIndiciesToVector (unsigned VecTyNumElements, SmallVectorImpl< int > &Mask)
static unsigned getShufflevectorNumGroups (ArrayRef< Value * > VL)
static SmallVector< int > calculateShufflevectorMask (ArrayRef< Value * > VL)
static bool isConstant (Value *V)
static bool isVectorLikeInstWithConstOps (Value *V)
 Checks if V is one of vector-like instructions, i.e.
static unsigned getPartNumElems (unsigned Size, unsigned NumParts)
 Returns power-of-2 number of elements in a single register (part), given the total number of elements Size and number of registers (parts) NumParts.
static unsigned getNumElems (unsigned Size, unsigned PartNumElems, unsigned Part)
 Returns correct remaining number of elements, considering total amount Size, (power-of-2 number) of elements in a single register PartNumElems and current register (part) Part.
static std::string shortBundleName (ArrayRef< Value * > VL, int Idx=-1)
 Print a short descriptor of the instruction bundle suitable for debug output.
static bool allSameBlock (ArrayRef< Value * > VL)
static bool allConstant (ArrayRef< Value * > VL)
static bool isSplat (ArrayRef< Value * > VL)
static bool isCommutative (Instruction *I, Value *ValWithUses)
static bool isCommutative (Instruction *I)
 This is a helper function to check whether I is commutative.
static unsigned getNumberOfPotentiallyCommutativeOps (Instruction *I)
template<typename T>
static std::optional< unsignedgetInsertExtractIndex (const Value *Inst, unsigned Offset)
static std::optional< unsignedgetElementIndex (const Value *Inst, unsigned Offset=0)
static bool allSameOpcode (ArrayRef< Value * > VL)
static SmallBitVector buildUseMask (int VF, ArrayRef< int > Mask, UseMask MaskArg)
 Prepares a use bitset for the given mask either for the first argument or for the second.
template<bool IsPoisonOnly = false>
static SmallBitVector isUndefVector (const Value *V, const SmallBitVector &UseMask={})
 Checks if the given value is actually an undefined constant vector.
static std::optional< TargetTransformInfo::ShuffleKindisFixedVectorShuffle (ArrayRef< Value * > VL, SmallVectorImpl< int > &Mask, AssumptionCache *AC)
 Checks if the vector of instructions can be represented as a shuffle, like: x0 = extractelement <4 x i8> x, i32 0 x3 = extractelement <4 x i8> x, i32 3 y1 = extractelement <4 x i8> y, i32 1 y2 = extractelement <4 x i8> y, i32 2 x0x0 = mul i8 x0, x0 x3x3 = mul i8 x3, x3 y1y1 = mul i8 y1, y1 y2y2 = mul i8 y2, y2 ins1 = insertelement <4 x i8> poison, i8 x0x0, i32 0 ins2 = insertelement <4 x i8> ins1, i8 x3x3, i32 1 ins3 = insertelement <4 x i8> ins2, i8 y1y1, i32 2 ins4 = insertelement <4 x i8> ins3, i8 y2y2, i32 3 ret <4 x i8> ins4 can be transformed into: %1 = shufflevector <4 x i8> x, <4 x i8> y, <4 x i32> <i32 0, i32 3, i32 5, i32 6> %2 = mul <4 x i8> %1, %1 ret <4 x i8> %2 Mask will return the Shuffle Mask equivalent to the extracted elements.
static std::optional< unsignedgetExtractIndex (const Instruction *E)
static bool llvm::areAllOperandsNonInsts (Value *V)
 Checks if the provided value does not require scheduling.
static bool llvm::isUsedOutsideBlock (Value *V)
 Checks if the provided value does not require scheduling.
static bool llvm::doesNotNeedToBeScheduled (Value *V)
 Checks if the specified value does not require scheduling.
static InstructionsState getSameOpcode (ArrayRef< Value * > VL, const TargetLibraryInfo &TLI)
static InstructionfindInstructionWithOpcode (ArrayRef< Value * > VL, unsigned Opcode)
 Find an instruction with a specific opcode in VL.
static bool areCompatibleCmpOps (Value *BaseOp0, Value *BaseOp1, Value *Op0, Value *Op1, const TargetLibraryInfo &TLI)
 Checks if the provided operands of 2 cmp instructions are compatible, i.e.
static bool isCmpSameOrSwapped (const CmpInst *BaseCI, const CmpInst *CI, const TargetLibraryInfo &TLI)
static bool allSameType (ArrayRef< Value * > VL)
static bool doesInTreeUserNeedToExtract (Value *Scalar, Instruction *UserInst, TargetLibraryInfo *TLI, const TargetTransformInfo *TTI)
static MemoryLocation getLocation (Instruction *I)
static bool isSimple (Instruction *I)
static void addMask (SmallVectorImpl< int > &Mask, ArrayRef< int > SubMask, bool ExtendingManyInputs=false)
 Shuffles Mask in accordance with the given SubMask.
static void fixupOrderingIndices (MutableArrayRef< unsigned > Order)
 Order may have elements assigned special value (size) which is out of bounds.
static SmallBitVector getAltInstrMask (ArrayRef< Value * > VL, Type *ScalarTy, unsigned Opcode0, unsigned Opcode1)
static SmallVector< Constant * > replicateMask (ArrayRef< Constant * > Val, unsigned VF)
 Replicates the given Val VF times.
static void llvm::inversePermutation (ArrayRef< unsigned > Indices, SmallVectorImpl< int > &Mask)
static void llvm::reorderScalars (SmallVectorImpl< Value * > &Scalars, ArrayRef< int > Mask)
 Reorders the list of scalars in accordance with the given Mask.
static bool llvm::doesNotNeedToSchedule (ArrayRef< Value * > VL)
 Checks if the specified array of instructions does not require scheduling.
static bool llvm::hasFullVectorsOrPowerOf2 (const TargetTransformInfo &TTI, Type *Ty, unsigned Sz)
 Returns true if widened type of Ty elements with size Sz represents full vector type, i.e.
static unsigned llvm::getNumberOfParts (const TargetTransformInfo &TTI, VectorType *VecTy, const unsigned Limit=std::numeric_limits< unsigned >::max())
 Returns number of parts, the type VecTy will be split at the codegen phase.
static void reorderReuses (SmallVectorImpl< int > &Reuses, ArrayRef< int > Mask)
 Reorders the given Reuses mask according to the given Mask.
static void reorderOrder (SmallVectorImpl< unsigned > &Order, ArrayRef< int > Mask, bool BottomOrder=false)
 Reorders the given Order according to the given Mask.
static bool arePointersCompatible (Value *Ptr1, Value *Ptr2, const TargetLibraryInfo &TLI, bool CompareOpcodes=true)
template<typename T>
static Align computeCommonAlignment (ArrayRef< Value * > VL)
 Calculates minimal alignment as a common alignment.
static bool isReverseOrder (ArrayRef< unsigned > Order)
 Check if Order represents reverse order.
static const SCEVcalculateRtStride (ArrayRef< Value * > PointerOps, Type *ElemTy, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl< unsigned > &SortedIndices)
 Checks if the provided list of pointers Pointers represents the strided pointers for type ElemTy.
static std::pair< InstructionCost, InstructionCostgetGEPCosts (const TargetTransformInfo &TTI, ArrayRef< Value * > Ptrs, Value *BasePtr, unsigned Opcode, TTI::TargetCostKind CostKind, Type *ScalarTy, VectorType *VecTy)
 Calculate the scalar and the vector costs from vectorizing set of GEPs.
static InstructionCost getShuffleCost (const TargetTransformInfo &TTI, TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask={}, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, int Index=0, VectorType *SubTp=nullptr, ArrayRef< const Value * > Args={})
 Returns the cost of the shuffle instructions with the given Kind, vector type Tp and optional Mask.
static InstructionCost getScalarizationOverhead (const TargetTransformInfo &TTI, Type *ScalarTy, VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={})
 This is similar to TargetTransformInfo::getScalarizationOverhead, but if ScalarTy is a FixedVectorType, a vector will be inserted or extracted instead of a scalar.
static InstructionCost getVectorInstrCost (const TargetTransformInfo &TTI, Type *ScalarTy, unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Scalar, ArrayRef< std::tuple< Value *, User *, int > > ScalarUserAndIdx)
 This is similar to TargetTransformInfo::getVectorInstrCost, but if ScalarTy is a FixedVectorType, a vector will be extracted instead of a scalar.
static InstructionCost getExtractWithExtendCost (const TargetTransformInfo &TTI, unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput)
 This is similar to TargetTransformInfo::getExtractWithExtendCost, but if Dst is a FixedVectorType, a vector will be extracted instead of a scalar.
static ValuecreateInsertVector (IRBuilderBase &Builder, Value *Vec, Value *V, unsigned Index, function_ref< Value *(Value *, Value *, ArrayRef< int >)> Generator={})
 Creates subvector insert.
static ValuecreateExtractVector (IRBuilderBase &Builder, Value *Vec, unsigned SubVecVF, unsigned Index)
 Generates subvector extract using Generator or using default shuffle.
static bool buildCompressMask (ArrayRef< Value * > PointerOps, ArrayRef< unsigned > Order, Type *ScalarTy, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl< int > &CompressMask)
 Builds compress-like mask for shuffles for the given PointerOps, ordered with Order.
static bool isMaskedLoadCompress (ArrayRef< Value * > VL, ArrayRef< Value * > PointerOps, ArrayRef< unsigned > Order, const TargetTransformInfo &TTI, const DataLayout &DL, ScalarEvolution &SE, AssumptionCache &AC, const DominatorTree &DT, const TargetLibraryInfo &TLI, const function_ref< bool(Value *)> AreAllUsersVectorized, bool &IsMasked, unsigned &InterleaveFactor, SmallVectorImpl< int > &CompressMask, VectorType *&LoadVecTy)
 Checks if the VL can be transformed to a (masked)load + compress or (masked) interleaved load.
static bool isMaskedLoadCompress (ArrayRef< Value * > VL, ArrayRef< Value * > PointerOps, ArrayRef< unsigned > Order, const TargetTransformInfo &TTI, const DataLayout &DL, ScalarEvolution &SE, AssumptionCache &AC, const DominatorTree &DT, const TargetLibraryInfo &TLI, const function_ref< bool(Value *)> AreAllUsersVectorized)
 Checks if the VL can be transformed to a (masked)load + compress or (masked) interleaved load.
static bool clusterSortPtrAccesses (ArrayRef< Value * > VL, ArrayRef< BasicBlock * > BBs, Type *ElemTy, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl< unsigned > &SortedIndices)
static bool areTwoInsertFromSameBuildVector (InsertElementInst *VU, InsertElementInst *V, function_ref< Value *(InsertElementInst *)> GetBaseOperand)
 Check if two insertelement instructions are from the same buildvector.
static bool isAlternateInstruction (Instruction *I, Instruction *MainOp, Instruction *AltOp, const TargetLibraryInfo &TLI)
 Checks if the specified instruction I is an alternate operation for the given MainOp and AltOp instructions.
static bool isRepeatedNonIdentityClusteredMask (ArrayRef< int > Mask, unsigned Sz)
 Checks if the given mask is a "clustered" mask with the same clusters of size Sz, which are not identity submasks.
static void combineOrders (MutableArrayRef< unsigned > Order, ArrayRef< unsigned > SecondaryOrder)
static LLVM_DUMP_METHOD void dumpOrder (const BoUpSLP::OrdersType &Order)
static void gatherPossiblyVectorizableLoads (const BoUpSLP &R, ArrayRef< Value * > VL, const DataLayout &DL, ScalarEvolution &SE, const TargetTransformInfo &TTI, SmallVectorImpl< SmallVector< std::pair< LoadInst *, int64_t > > > &GatheredLoads, bool AddNew=true)
 Tries to find subvector of loads and builds new vector of only loads if can be profitable.
static std::pair< size_t, size_t > generateKeySubkey (Value *V, const TargetLibraryInfo *TLI, function_ref< hash_code(size_t, LoadInst *)> LoadsSubkeyGenerator, bool AllowAlternate)
 Generates key/subkey pair for the given value to provide effective sorting of the values and better detection of the vectorizable values sequences.
static bool isMainInstruction (Instruction *I, Instruction *MainOp, Instruction *AltOp, const TargetLibraryInfo &TLI)
 Checks if the specified instruction I is an main operation for the given MainOp and AltOp instructions.
static SmallVector< Type * > buildIntrinsicArgTypes (const CallInst *CI, const Intrinsic::ID ID, const unsigned VF, unsigned MinBW, const TargetTransformInfo *TTI)
 Builds the arguments types vector for the given call instruction with the given ID for the specified vector factor.
static std::pair< InstructionCost, InstructionCostgetVectorCallCosts (CallInst *CI, FixedVectorType *VecTy, TargetTransformInfo *TTI, TargetLibraryInfo *TLI, ArrayRef< Type * > ArgTys)
 Calculates the costs of vectorized intrinsic (if possible) and vectorized function (if possible) calls.
static std::pair< Instruction *, Instruction * > getMainAltOpsNoStateVL (ArrayRef< Value * > VL)
 Returns main/alternate instructions for the given VL.
static bool tryToFindDuplicates (SmallVectorImpl< Value * > &VL, SmallVectorImpl< int > &ReuseShuffleIndices, const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI, const InstructionsState &S, const BoUpSLP::EdgeInfo &UserTreeIdx, bool TryPad=false)
 Checks that every instruction appears once in the list and if not, packs them, building ReuseShuffleIndices mask and mutating VL.
static InstructionCost canConvertToFMA (ArrayRef< Value * > VL, const InstructionsState &S, DominatorTree &DT, const DataLayout &DL, TargetTransformInfo &TTI, const TargetLibraryInfo &TLI)
 Check if we can convert fadd/fsub sequence to FMAD.
static bool isLoadCombineCandidateImpl (Value *Root, unsigned NumElts, TargetTransformInfo *TTI, bool MustMatchOrInst)
static bool isFirstInsertElement (const InsertElementInst *IE1, const InsertElementInst *IE2)
 Checks if the IE1 instructions is followed by IE2 instruction in the buildvector sequence.
template<typename T>
static TperformExtractsShuffleAction (MutableArrayRef< std::pair< T *, SmallVector< int > > > ShuffleMask, Value *Base, function_ref< unsigned(T *)> GetVF, function_ref< std::pair< T *, bool >(T *, ArrayRef< int >, bool)> ResizeAction, function_ref< T *(ArrayRef< int >, ArrayRef< T * >)> Action)
 Does the analysis of the provided shuffle masks and performs the requested actions on the vectors with the given shuffle masks.
static InstructionpropagateMetadata (Instruction *Inst, ArrayRef< Value * > VL)
static DebugLoc getDebugLocFromPHI (PHINode &PN)
static RecurKind getRdxKind (Value *V)
 Gets recurrence kind from the specified value.
static bool checkTreeSizes (ArrayRef< std::pair< unsigned, unsigned > > Sizes, bool First)
 Checks if the quadratic mean deviation is less than 90% of the mean size.
static std::optional< unsignedgetAggregateSize (Instruction *InsertInst)
static void findBuildAggregateRec (Instruction *LastInsertInst, TargetTransformInfo *TTI, SmallVectorImpl< Value * > &BuildVectorOpds, SmallVectorImpl< Value * > &InsertElts, unsigned OperandOffset, const BoUpSLP &R)
static bool findBuildAggregate (Instruction *LastInsertInst, TargetTransformInfo *TTI, SmallVectorImpl< Value * > &BuildVectorOpds, SmallVectorImpl< Value * > &InsertElts, const BoUpSLP &R)
 Recognize construction of vectors like ra = insertelement <4 x float> poison, float s0, i32 0 rb = insertelement <4 x float> ra, float s1, i32 1 rc = insertelement <4 x float> rb, float s2, i32 2 rd = insertelement <4 x float> rc, float s3, i32 3 starting from the last insertelement or insertvalue instruction.
static InstructiongetReductionInstr (const DominatorTree *DT, PHINode *P, BasicBlock *ParentBB, LoopInfo *LI)
 Try and get a reduction instruction from a phi node.
static bool matchRdxBop (Instruction *I, Value *&V0, Value *&V1)
static InstructiontryGetSecondaryReductionRoot (PHINode *Phi, Instruction *Root)
 We could have an initial reduction that is not an add.
static InstructiongetNonPhiOperand (Instruction *I, PHINode *Phi)
 Returns the first operand of I that does not match Phi.
static bool isReductionCandidate (Instruction *I)
 \Returns true if I is a candidate instruction for reduction vectorization.
template<typename T>
static bool tryToVectorizeSequence (SmallVectorImpl< T * > &Incoming, function_ref< bool(T *, T *)> Comparator, function_ref< bool(ArrayRef< T * >, T *)> AreCompatible, function_ref< bool(ArrayRef< T * >, bool)> TryToVectorizeHelper, bool MaxVFOnly, BoUpSLP &R)
template<bool IsCompatibility>
static bool compareCmp (Value *V, Value *V2, TargetLibraryInfo &TLI, const DominatorTree &DT)
 Compare two cmp instructions.

Variables

static cl::opt< boolRunSLPVectorization ("vectorize-slp", cl::init(true), cl::Hidden, cl::desc("Run the SLP vectorization passes"))
static cl::opt< boolSLPReVec ("slp-revec", cl::init(false), cl::Hidden, cl::desc("Enable vectorization for wider vector utilization"))
static cl::opt< int > SLPCostThreshold ("slp-threshold", cl::init(0), cl::Hidden, cl::desc("Only vectorize if you gain more than this " "number "))
static cl::opt< boolSLPSkipEarlyProfitabilityCheck ("slp-skip-early-profitability-check", cl::init(false), cl::Hidden, cl::desc("When true, SLP vectorizer bypasses profitability checks based on " "heuristics and makes vectorization decision via cost modeling."))
static cl::opt< boolShouldVectorizeHor ("slp-vectorize-hor", cl::init(true), cl::Hidden, cl::desc("Attempt to vectorize horizontal reductions"))
static cl::opt< boolShouldStartVectorizeHorAtStore ("slp-vectorize-hor-store", cl::init(false), cl::Hidden, cl::desc("Attempt to vectorize horizontal reductions feeding into a store"))
static cl::opt< boolSplitAlternateInstructions ("slp-split-alternate-instructions", cl::init(true), cl::Hidden, cl::desc("Improve the code quality by splitting alternate instructions"))
static cl::opt< int > MaxVectorRegSizeOption ("slp-max-reg-size", cl::init(128), cl::Hidden, cl::desc("Attempt to vectorize for this register size in bits"))
static cl::opt< unsignedMaxVFOption ("slp-max-vf", cl::init(0), cl::Hidden, cl::desc("Maximum SLP vectorization factor (0=unlimited)"))
static cl::opt< int > ScheduleRegionSizeBudget ("slp-schedule-budget", cl::init(100000), cl::Hidden, cl::desc("Limit the size of the SLP scheduling region per block"))
 Limits the size of scheduling regions in a block.
static cl::opt< int > MinVectorRegSizeOption ("slp-min-reg-size", cl::init(128), cl::Hidden, cl::desc("Attempt to vectorize for this register size in bits"))
static cl::opt< unsignedRecursionMaxDepth ("slp-recursion-max-depth", cl::init(12), cl::Hidden, cl::desc("Limit the recursion depth when building a vectorizable tree"))
static cl::opt< unsignedMinTreeSize ("slp-min-tree-size", cl::init(3), cl::Hidden, cl::desc("Only vectorize small trees if they are fully vectorizable"))
static cl::opt< int > LookAheadMaxDepth ("slp-max-look-ahead-depth", cl::init(2), cl::Hidden, cl::desc("The maximum look-ahead depth for operand reordering scores"))
static cl::opt< int > RootLookAheadMaxDepth ("slp-max-root-look-ahead-depth", cl::init(2), cl::Hidden, cl::desc("The maximum look-ahead depth for searching best rooting option"))
static cl::opt< unsignedMinProfitableStridedLoads ("slp-min-strided-loads", cl::init(2), cl::Hidden, cl::desc("The minimum number of loads, which should be considered strided, " "if the stride is > 1 or is runtime value"))
static cl::opt< unsignedMaxProfitableLoadStride ("slp-max-stride", cl::init(8), cl::Hidden, cl::desc("The maximum stride, considered to be profitable."))
static cl::opt< boolDisableTreeReorder ("slp-disable-tree-reorder", cl::init(false), cl::Hidden, cl::desc("Disable tree reordering even if it is " "profitable. Used for testing only."))
static cl::opt< boolForceStridedLoads ("slp-force-strided-loads", cl::init(false), cl::Hidden, cl::desc("Generate strided loads even if they are not " "profitable. Used for testing only."))
static cl::opt< boolViewSLPTree ("view-slp-tree", cl::Hidden, cl::desc("Display the SLP trees with Graphviz"))
static cl::opt< boolVectorizeNonPowerOf2 ("slp-vectorize-non-power-of-2", cl::init(false), cl::Hidden, cl::desc("Try to vectorize with non-power-of-2 number of elements."))
static cl::opt< boolVectorizeCopyableElements ("slp-copyable-elements", cl::init(true), cl::Hidden, cl::desc("Try to replace values with the idempotent instructions for " "better vectorization."))
 Enables vectorization of copyable elements.
static const unsigned AliasedCheckLimit = 10
static constexpr int UsesLimit = 64
static const unsigned MaxMemDepDistance = 160
static const int MinScheduleRegionSize = 16
 If the ScheduleRegionSizeBudget is exhausted, we allow small scheduling regions to be handled.
static const unsigned MaxPHINumOperands = 128
 Maximum allowed number of operands in the PHI nodes.

Macro Definition Documentation

◆ DEBUG_TYPE

#define DEBUG_TYPE   "SLP"

Definition at line 111 of file SLPVectorizer.cpp.

◆ SV_NAME

#define SV_NAME   "slp-vectorizer"

Definition at line 110 of file SLPVectorizer.cpp.

Function Documentation

◆ addMask()

void addMask ( SmallVectorImpl< int > & Mask,
ArrayRef< int > SubMask,
bool ExtendingManyInputs = false )
static

Shuffles Mask in accordance with the given SubMask.

Parameters
ExtendingManyInputsSupports reshuffling of the mask with not only one but two input vectors.

Definition at line 1725 of file SLPVectorizer.cpp.

References assert(), llvm::ArrayRef< T >::begin(), E(), llvm::ArrayRef< T >::empty(), llvm::ArrayRef< T >::end(), I, llvm::PoisonMaskElem, and llvm::ArrayRef< T >::size().

Referenced by llvm::slpvectorizer::BoUpSLP::getReorderingData(), and llvm::slpvectorizer::BoUpSLP::reorderTopToBottom().

◆ allConstant()

bool allConstant ( ArrayRef< Value * > VL)
static

◆ allSameBlock()

◆ allSameOpcode()

bool allSameOpcode ( ArrayRef< Value * > VL)
static
Returns
true if all of the values in VL use the same opcode. For comparison instructions, also checks if predicates match. PoisonValues are considered matching. Interchangeable instructions are not considered.

Definition at line 655 of file SLPVectorizer.cpp.

References llvm::CmpInst::BAD_ICMP_PREDICATE, llvm::cast(), llvm::ArrayRef< T >::end(), llvm::find_if(), llvm::Instruction::getOpcode(), llvm::isa(), and llvm::IsaPred.

Referenced by getSameOpcode().

◆ allSameType()

bool allSameType ( ArrayRef< Value * > VL)
static
Returns
true if all of the values in VL have the same type or false otherwise.

Definition at line 1667 of file SLPVectorizer.cpp.

References llvm::all_of(), and llvm::ArrayRef< T >::consume_front().

Referenced by llvm::slpvectorizer::BoUpSLP::buildTree(), llvm::slpvectorizer::BoUpSLP::buildTree(), and llvm::slpvectorizer::BoUpSLP::getReorderingData().

◆ areCompatibleCmpOps()

bool areCompatibleCmpOps ( Value * BaseOp0,
Value * BaseOp1,
Value * Op0,
Value * Op1,
const TargetLibraryInfo & TLI )
static

Checks if the provided operands of 2 cmp instructions are compatible, i.e.

compatible instructions or constants, or just some other regular values.

Definition at line 1437 of file SLPVectorizer.cpp.

References getSameOpcode(), llvm::isa(), and isConstant().

Referenced by isCmpSameOrSwapped().

◆ arePointersCompatible()

bool arePointersCompatible ( Value * Ptr1,
Value * Ptr2,
const TargetLibraryInfo & TLI,
bool CompareOpcodes = true )
static

◆ areTwoInsertFromSameBuildVector()

◆ buildCompressMask()

bool buildCompressMask ( ArrayRef< Value * > PointerOps,
ArrayRef< unsigned > Order,
Type * ScalarTy,
const DataLayout & DL,
ScalarEvolution & SE,
SmallVectorImpl< int > & CompressMask )
static

Builds compress-like mask for shuffles for the given PointerOps, ordered with Order.

Returns
true if the mask represents strided access, false - otherwise.

Definition at line 6629 of file SLPVectorizer.cpp.

References llvm::SmallVectorImpl< T >::assign(), DL, llvm::ArrayRef< T >::empty(), llvm::ArrayRef< T >::front(), llvm::getPointersDiff(), I, llvm::PoisonMaskElem, Ptr, llvm::seq(), and llvm::ArrayRef< T >::size().

Referenced by isMaskedLoadCompress().

◆ buildIntrinsicArgTypes()

SmallVector< Type * > buildIntrinsicArgTypes ( const CallInst * CI,
const Intrinsic::ID ID,
const unsigned VF,
unsigned MinBW,
const TargetTransformInfo * TTI )
static

Builds the arguments types vector for the given call instruction with the given ID for the specified vector factor.

Definition at line 9786 of file SLPVectorizer.cpp.

References llvm::CallBase::args(), llvm::enumerate(), llvm::IntegerType::get(), llvm::Value::getContext(), getWidenedType(), llvm::isVectorIntrinsicWithScalarOpAtArg(), llvm::Intrinsic::not_intrinsic, and llvm::SmallVectorTemplateBase< T, bool >::push_back().

◆ buildUseMask()

SmallBitVector buildUseMask ( int VF,
ArrayRef< int > Mask,
UseMask MaskArg )
static

Prepares a use bitset for the given mask either for the first argument or for the second.

Definition at line 692 of file SLPVectorizer.cpp.

References llvm::enumerate(), and llvm::PoisonMaskElem.

Referenced by performExtractsShuffleAction().

◆ calculateRtStride()

◆ calculateShufflevectorMask()

SmallVector< int > calculateShufflevectorMask ( ArrayRef< Value * > VL)
static
Returns
a shufflevector mask which is used to vectorize shufflevectors e.g., %5 = shufflevector <8 x i16> %3, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> %6 = shufflevector <8 x i16> %3, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %7 = shufflevector <8 x i16> %4, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> %8 = shufflevector <8 x i16> %4, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> the result is <0, 1, 2, 3, 12, 13, 14, 15, 16, 17, 18, 19, 28, 29, 30, 31>

Definition at line 412 of file SLPVectorizer.cpp.

References assert(), llvm::cast(), llvm::ArrayRef< T >::front(), getShufflevectorNumGroups(), and llvm::PoisonMaskElem.

◆ canConvertToFMA()

◆ checkTreeSizes()

bool checkTreeSizes ( ArrayRef< std::pair< unsigned, unsigned > > Sizes,
bool First )
static

Checks if the quadratic mean deviation is less than 90% of the mean size.

Definition at line 22747 of file SLPVectorizer.cpp.

References llvm::First.

◆ clusterSortPtrAccesses()

◆ combineOrders()

◆ compareCmp()

template<bool IsCompatibility>
bool compareCmp ( Value * V,
Value * V2,
TargetLibraryInfo & TLI,
const DominatorTree & DT )
static

Compare two cmp instructions.

If IsCompatibility is true, function returns true if 2 cmps have same/swapped predicates and mos compatible corresponding operands. If IsCompatibility is false, function implements strict weak ordering relation between two cmp instructions, returning true if the first instruction is "less" than the second, i.e. its predicate is less than the predicate of the second or the operands IDs are less than the operands IDs of the second cmp instruction.

Definition at line 25809 of file SLPVectorizer.cpp.

References assert(), llvm::cast(), llvm::dyn_cast(), E(), llvm::DomTreeNodeBase< NodeT >::getDFSNumIn(), llvm::DominatorTreeBase< NodeT, IsPostDom >::getNode(), llvm::User::getOperand(), getSameOpcode(), llvm::Type::getScalarSizeInBits(), llvm::CmpInst::getSwappedPredicate(), llvm::Value::getType(), llvm::Type::getTypeID(), llvm::Value::getValueID(), I, and isValidElementType().

◆ computeCommonAlignment()

template<typename T>
Align computeCommonAlignment ( ArrayRef< Value * > VL)
static

Calculates minimal alignment as a common alignment.

Definition at line 6353 of file SLPVectorizer.cpp.

References llvm::cast(), llvm::ArrayRef< T >::consume_front(), and llvm::getAlign().

Referenced by llvm::slpvectorizer::BoUpSLP::canVectorizeLoads().

◆ createExtractVector()

Value * createExtractVector ( IRBuilderBase & Builder,
Value * Vec,
unsigned SubVecVF,
unsigned Index )
static

Generates subvector extract using Generator or using default shuffle.

Definition at line 6619 of file SLPVectorizer.cpp.

References llvm::PoisonMaskElem.

Referenced by llvm::slpvectorizer::BoUpSLP::vectorizeTree().

◆ createInsertVector()

Value * createInsertVector ( IRBuilderBase & Builder,
Value * Vec,
Value * V,
unsigned Index,
function_ref< Value *(Value *, Value *, ArrayRef< int >)> Generator = {} )
static

Creates subvector insert.

Generates shuffle using Generator or using default shuffle.

Definition at line 6589 of file SLPVectorizer.cpp.

◆ DEBUG_COUNTER()

DEBUG_COUNTER ( VectorizedGraphs ,
"slp-vectorized" ,
"Controls which SLP graphs should be vectorized."  )

◆ doesInTreeUserNeedToExtract()

bool doesInTreeUserNeedToExtract ( Value * Scalar,
Instruction * UserInst,
TargetLibraryInfo * TLI,
const TargetTransformInfo * TTI )
static
Returns
True if in-tree use also needs extract. This refers to possible scalar operand in vectorized instruction.

Definition at line 1674 of file SLPVectorizer.cpp.

References llvm::any_of(), llvm::CallBase::args(), llvm::cast(), llvm::enumerate(), llvm::Instruction::getOpcode(), llvm::LoadInst::getPointerOperand(), and llvm::getVectorIntrinsicIDForCall().

Referenced by llvm::slpvectorizer::BoUpSLP::buildExternalUses().

◆ dumpOrder()

LLVM_DUMP_METHOD void dumpOrder ( const BoUpSLP::OrdersType & Order)
static

Definition at line 8908 of file SLPVectorizer.cpp.

References llvm::dbgs(), and LLVM_DUMP_METHOD.

◆ findBuildAggregate()

bool findBuildAggregate ( Instruction * LastInsertInst,
TargetTransformInfo * TTI,
SmallVectorImpl< Value * > & BuildVectorOpds,
SmallVectorImpl< Value * > & InsertElts,
const BoUpSLP & R )
static

Recognize construction of vectors like ra = insertelement <4 x float> poison, float s0, i32 0 rb = insertelement <4 x float> ra, float s1, i32 1 rc = insertelement <4 x float> rb, float s2, i32 2 rd = insertelement <4 x float> rc, float s3, i32 3 starting from the last insertelement or insertvalue instruction.

Also recognize homogeneous aggregates like {<2 x float>, <2 x float>}, {{float, float}, {float, float}}, [2 x {float, float}] and so on. See llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll for examples.

Assume LastInsertInst is of InsertElementInst or InsertValueInst type.

Returns
true if it matches.

Definition at line 25282 of file SLPVectorizer.cpp.

References assert(), llvm::SmallVectorTemplateCommon< T, typename >::empty(), llvm::erase(), findBuildAggregateRec(), getAggregateSize(), llvm::isa(), llvm::SmallVectorImpl< T >::resize(), and llvm::SmallVectorTemplateCommon< T, typename >::size().

◆ findBuildAggregateRec()

void findBuildAggregateRec ( Instruction * LastInsertInst,
TargetTransformInfo * TTI,
SmallVectorImpl< Value * > & BuildVectorOpds,
SmallVectorImpl< Value * > & InsertElts,
unsigned OperandOffset,
const BoUpSLP & R )
static

◆ findInstructionWithOpcode()

Instruction * findInstructionWithOpcode ( ArrayRef< Value * > VL,
unsigned Opcode )
static

Find an instruction with a specific opcode in VL.

Parameters
VLArray of values to search through. Must contain only Instructions and PoisonValues.
OpcodeThe instruction opcode to search for
Returns
  • The first instruction found with matching opcode
  • nullptr if no matching instruction is found

Definition at line 1422 of file SLPVectorizer.cpp.

References assert(), llvm::cast(), and llvm::isa().

Referenced by getSameOpcode().

◆ fixupOrderingIndices()

void fixupOrderingIndices ( MutableArrayRef< unsigned > Order)
static

Order may have elements assigned special value (size) which is out of bounds.

Such indices only appear on places which correspond to undef values (see canReuseExtract for details) and used in order to avoid undef values have effect on operands ordering. The first loop below simply finds all unused indices and then the next loop nest assigns these indices for undef values positions. As an example below Order has two undef positions and they have assigned values 3 and 7 respectively: before: 6 9 5 4 9 2 1 0 after: 6 3 5 4 7 2 1 0

Definition at line 1760 of file SLPVectorizer.cpp.

References assert(), llvm::SmallBitVector::count(), llvm::SmallBitVector::find_first(), llvm::SmallBitVector::find_next(), I, llvm::SmallBitVector::none(), llvm::SmallBitVector::reset(), llvm::SmallBitVector::set(), and llvm::ArrayRef< T >::size().

Referenced by llvm::slpvectorizer::BoUpSLP::getReorderingData(), llvm::slpvectorizer::BoUpSLP::reorderBottomToTop(), reorderOrder(), and llvm::slpvectorizer::BoUpSLP::reorderTopToBottom().

◆ gatherPossiblyVectorizableLoads()

◆ generateKeySubkey()

std::pair< size_t, size_t > generateKeySubkey ( Value * V,
const TargetLibraryInfo * TLI,
function_ref< hash_code(size_t, LoadInst *)> LoadsSubkeyGenerator,
bool AllowAlternate )
static

Generates key/subkey pair for the given value to provide effective sorting of the values and better detection of the vectorizable values sequences.

The keys/subkeys can be used for better sorting of the values themselves (keys) and in values subgroups (subkeys).

Definition at line 9581 of file SLPVectorizer.cpp.

References llvm::SmallBitVector::all(), Call, llvm::cast(), llvm::dyn_cast(), generateKeySubkey(), llvm::CmpInst::getInversePredicate(), llvm::VFDatabase::getMappings(), llvm::User::getOperand(), llvm::CmpInst::getSwappedPredicate(), llvm::Value::getType(), llvm::getVectorIntrinsicIDForCall(), llvm::hash_combine(), llvm::hash_value(), I, llvm::InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key, llvm::isa(), llvm::Instruction::isIntDivRem(), llvm::isTriviallyVectorizable(), isUndefVector(), and isVectorLikeInstWithConstOps().

Referenced by generateKeySubkey().

◆ getAggregateSize()

std::optional< unsigned > getAggregateSize ( Instruction * InsertInst)
static

Definition at line 25215 of file SLPVectorizer.cpp.

References llvm::cast(), llvm::dyn_cast(), llvm::Type::isSingleValueType(), and IV.

Referenced by findBuildAggregate().

◆ getAltInstrMask()

SmallBitVector getAltInstrMask ( ArrayRef< Value * > VL,
Type * ScalarTy,
unsigned Opcode0,
unsigned Opcode1 )
static
Returns
a bitset for selecting opcodes. false for Opcode0 and true for Opcode1.

Definition at line 1786 of file SLPVectorizer.cpp.

References llvm::cast(), getNumElements(), getOpcode(), llvm::isa(), llvm::seq(), llvm::SmallBitVector::set(), and llvm::ArrayRef< T >::size().

Referenced by llvm::slpvectorizer::BoUpSLP::reorderTopToBottom().

◆ getDebugLocFromPHI()

DebugLoc getDebugLocFromPHI ( PHINode & PN)
static

◆ getElementIndex()

std::optional< unsigned > getElementIndex ( const Value * Inst,
unsigned Offset = 0 )
static
Returns
inserting or extracting index of InsertElement, ExtractElement or InsertValue instruction, using Offset as base offset for index.
std::nullopt if the index is not an immediate.

Definition at line 622 of file SLPVectorizer.cpp.

References llvm::dyn_cast(), getInsertExtractIndex(), I, IV, and llvm::Offset.

◆ getExtractIndex()

std::optional< unsigned > getExtractIndex ( const Instruction * E)
static
Returns
True if Extract{Value,Element} instruction extracts element Idx.

Definition at line 870 of file SLPVectorizer.cpp.

References assert(), llvm::cast(), llvm::dyn_cast(), and E().

Referenced by llvm::slpvectorizer::BoUpSLP::getReorderingData().

◆ getExtractWithExtendCost()

InstructionCost getExtractWithExtendCost ( const TargetTransformInfo & TTI,
unsigned Opcode,
Type * Dst,
VectorType * VecTy,
unsigned Index,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput )
static

◆ getFloorFullVectorNumberOfElements()

unsigned getFloorFullVectorNumberOfElements ( const TargetTransformInfo & TTI,
Type * Ty,
unsigned Sz )
static

Returns the number of elements of the given type Ty, not greater than Sz, which forms type, which splits by TTI into whole vector types during legalization.

Definition at line 307 of file SLPVectorizer.cpp.

References llvm::bit_ceil(), llvm::bit_floor(), llvm::divideCeil(), getWidenedType(), and isValidElementType().

Referenced by llvm::slpvectorizer::BoUpSLP::canVectorizeLoads().

◆ getFullVectorNumberOfElements()

unsigned getFullVectorNumberOfElements ( const TargetTransformInfo & TTI,
Type * Ty,
unsigned Sz )
static

Returns the number of elements of the given type Ty, not less than Sz, which forms type, which splits by TTI into whole vector types during legalization.

Definition at line 292 of file SLPVectorizer.cpp.

References llvm::bit_ceil(), llvm::divideCeil(), getWidenedType(), and isValidElementType().

Referenced by isMaskedLoadCompress(), and tryToFindDuplicates().

◆ getGEPCosts()

◆ getInsertExtractIndex()

template<typename T>
std::optional< unsigned > getInsertExtractIndex ( const Value * Inst,
unsigned Offset )
static

Definition at line 597 of file SLPVectorizer.cpp.

References llvm::dyn_cast(), and llvm::Offset.

Referenced by getElementIndex().

◆ getLocation()

MemoryLocation getLocation ( Instruction * I)
static
Returns
the AA location that is being access by the instruction.

Definition at line 1703 of file SLPVectorizer.cpp.

References llvm::dyn_cast(), llvm::MemoryLocation::get(), and I.

◆ getMainAltOpsNoStateVL()

std::pair< Instruction *, Instruction * > getMainAltOpsNoStateVL ( ArrayRef< Value * > VL)
static

Returns main/alternate instructions for the given VL.

Unlike getSameOpcode supports non-compatible instructions for better SplitVectorize node support.

Returns
first main/alt instructions, if only poisons and instruction with only 2 opcodes exists. Returns pair of nullptr otherwise.

Definition at line 10349 of file SLPVectorizer.cpp.

References assert(), llvm::dyn_cast(), llvm::Instruction::getOpcode(), llvm::ilist_detail::node_parent_access< NodeTy, ParentTy >::getParent(), I, and llvm::isa().

◆ getNonPhiOperand()

Instruction * getNonPhiOperand ( Instruction * I,
PHINode * Phi )
static

Returns the first operand of I that does not match Phi.

If operand is not an instruction it returns nullptr.

Definition at line 25405 of file SLPVectorizer.cpp.

References llvm::dyn_cast(), I, and matchRdxBop().

◆ getNumberOfPotentiallyCommutativeOps()

unsigned getNumberOfPotentiallyCommutativeOps ( Instruction * I)
static
Returns
number of operands of I, considering commutativity. Returns 2 for commutative instrinsics.
Parameters
IThe instruction to check for commutativity

Definition at line 586 of file SLPVectorizer.cpp.

References I, llvm::isa(), and isCommutative().

◆ getNumElements()

◆ getNumElems()

unsigned getNumElems ( unsigned Size,
unsigned PartNumElems,
unsigned Part )
static

Returns correct remaining number of elements, considering total amount Size, (power-of-2 number) of elements in a single register PartNumElems and current register (part) Part.

Definition at line 463 of file SLPVectorizer.cpp.

References Size.

Referenced by llvm::slpvectorizer::BoUpSLP::findReusedOrderedScalars().

◆ getPartNumElems()

unsigned getPartNumElems ( unsigned Size,
unsigned NumParts )
static

Returns power-of-2 number of elements in a single register (part), given the total number of elements Size and number of registers (parts) NumParts.

Definition at line 456 of file SLPVectorizer.cpp.

References llvm::bit_ceil(), llvm::divideCeil(), and Size.

Referenced by llvm::slpvectorizer::BoUpSLP::ShuffleCostEstimator::add(), llvm::slpvectorizer::BoUpSLP::ShuffleCostEstimator::add(), and llvm::slpvectorizer::BoUpSLP::findReusedOrderedScalars().

◆ getRdxKind()

RecurKind getRdxKind ( Value * V)
static

Gets recurrence kind from the specified value.

Definition at line 25212 of file SLPVectorizer.cpp.

◆ getReductionInstr()

Instruction * getReductionInstr ( const DominatorTree * DT,
PHINode * P,
BasicBlock * ParentBB,
LoopInfo * LI )
static

Try and get a reduction instruction from a phi node.

Given a phi node P in a block ParentBB, consider possible reductions if they come from either ParentBB or a containing loop latch.

Returns
A candidate reduction value if possible, or
nullptr
if not possible.

Definition at line 25317 of file SLPVectorizer.cpp.

References llvm::cast(), llvm::DominatorTree::dominates(), llvm::dyn_cast(), llvm::LoopInfoBase< BlockT, LoopT >::getLoopFor(), llvm::LoopBase< BlockT, LoopT >::getLoopLatch(), llvm::isa(), and P.

◆ getSameOpcode()

InstructionsState getSameOpcode ( ArrayRef< Value * > VL,
const TargetLibraryInfo & TLI )
static
Returns
analysis of the Instructions in VL described in InstructionsState, the Opcode that we suppose the whole list could be vectorized even if its structure is diverse.

Definition at line 1473 of file SLPVectorizer.cpp.

References llvm::all_of(), allSameOpcode(), assert(), llvm::CmpInst::BAD_ICMP_PREDICATE, Call, llvm::cast(), llvm::SetVector< T, Vector, Set, N >::contains(), llvm::dyn_cast(), llvm::SmallVectorTemplateCommon< T, typename >::empty(), llvm::ArrayRef< T >::end(), llvm::find_if(), findInstructionWithOpcode(), llvm::SmallVectorTemplateCommon< T, typename >::front(), llvm::CallBase::getBundleOperandsStartIndex(), llvm::CallBase::getCalledFunction(), llvm::VFDatabase::getMappings(), llvm::Instruction::getOpcode(), llvm::User::getOperand(), llvm::CmpInst::getSwappedPredicate(), llvm::Value::getType(), llvm::getVectorIntrinsicIDForCall(), llvm::CallBase::hasOperandBundles(), I, llvm::SetVector< T, Vector, Set, N >::insert(), llvm::isa(), llvm::IsaPred, isCmpSameOrSwapped(), llvm::isTriviallyVectorizable(), isVectorLikeInstWithConstOps(), llvm::iterator_range(), llvm::User::op_begin(), llvm::ArrayRef< T >::size(), llvm::SetVector< T, Vector, Set, N >::size(), and llvm::SmallVectorTemplateCommon< T, typename >::size().

Referenced by areCompatibleCmpOps(), arePointersCompatible(), canConvertToFMA(), compareCmp(), llvm::slpvectorizer::BoUpSLP::LookAheadHeuristics::getShallowScore(), llvm::slpvectorizer::BoUpSLP::isTreeNotExtendable(), llvm::slpvectorizer::BoUpSLP::VLOperands::reorder(), and tryToFindDuplicates().

◆ getScalarizationOverhead()

InstructionCost getScalarizationOverhead ( const TargetTransformInfo & TTI,
Type * ScalarTy,
VectorType * Ty,
const APInt & DemandedElts,
bool Insert,
bool Extract,
TTI::TargetCostKind CostKind,
bool ForPoisonSrc = true,
ArrayRef< Value * > VL = {} )
static

This is similar to TargetTransformInfo::getScalarizationOverhead, but if ScalarTy is a FixedVectorType, a vector will be inserted or extracted instead of a scalar.

Definition at line 6519 of file SLPVectorizer.cpp.

References CostKind.

Referenced by llvm::slpvectorizer::BoUpSLP::canVectorizeLoads(), and isMaskedLoadCompress().

◆ getShuffleCost()

InstructionCost getShuffleCost ( const TargetTransformInfo & TTI,
TTI::ShuffleKind Kind,
VectorType * Tp,
ArrayRef< int > Mask = {},
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
int Index = 0,
VectorType * SubTp = nullptr,
ArrayRef< const Value * > Args = {} )
static

Returns the cost of the shuffle instructions with the given Kind, vector type Tp and optional Mask.

Adds SLP-specifc cost estimation for insert subvector pattern.

Definition at line 6490 of file SLPVectorizer.cpp.

Referenced by llvm::slpvectorizer::BoUpSLP::canVectorizeLoads(), getExtractWithExtendCost(), llvm::slpvectorizer::BoUpSLP::getReorderingData(), llvm::slpvectorizer::BoUpSLP::getTreeCost(), getVectorInstrCost(), and isMaskedLoadCompress().

◆ getShufflevectorNumGroups()

unsigned getShufflevectorNumGroups ( ArrayRef< Value * > VL)
static
Returns
the number of groups of shufflevector A group has the following features
  1. All of value in a group are shufflevector.
  2. The mask of all shufflevector is isExtractSubvectorMask.
  3. The mask of all shufflevector uses all of the elements of the source. e.g., it is 1 group (%0) %1 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> %2 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> it is 2 groups (%3 and %4) %5 = shufflevector <8 x i16> %3, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> %6 = shufflevector <8 x i16> %3, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %7 = shufflevector <8 x i16> %4, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> %8 = shufflevector <8 x i16> %4, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> it is 0 group %12 = shufflevector <8 x i16> %10, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> %13 = shufflevector <8 x i16> %11, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>

Definition at line 360 of file SLPVectorizer.cpp.

References llvm::SmallBitVector::all(), llvm::all_of(), assert(), llvm::cast(), E(), llvm::ArrayRef< T >::empty(), llvm::ArrayRef< T >::front(), I, llvm::IsaPred, llvm::SmallBitVector::set(), llvm::ArrayRef< T >::size(), and llvm::ArrayRef< T >::slice().

Referenced by calculateShufflevectorMask().

◆ getValueType()

◆ getVectorCallCosts()

std::pair< InstructionCost, InstructionCost > getVectorCallCosts ( CallInst * CI,
FixedVectorType * VecTy,
TargetTransformInfo * TTI,
TargetLibraryInfo * TLI,
ArrayRef< Type * > ArgTys )
static

Calculates the costs of vectorized intrinsic (if possible) and vectorized function (if possible) calls.

Returns invalid cost for the corresponding calls, if they cannot be vectorized/will be scalarized.

Definition at line 9811 of file SLPVectorizer.cpp.

References llvm::dyn_cast(), llvm::VFShape::get(), llvm::ElementCount::getFixed(), llvm::CallBase::getFunctionType(), llvm::InstructionCost::getInvalid(), llvm::FixedVectorType::getNumElements(), llvm::getVectorIntrinsicIDForCall(), llvm::VFDatabase::getVectorizedFunction(), IntrinsicCost, llvm::CallBase::isNoBuiltin(), and llvm::TargetTransformInfo::TCK_RecipThroughput.

◆ getVectorInstrCost()

InstructionCost getVectorInstrCost ( const TargetTransformInfo & TTI,
Type * ScalarTy,
unsigned Opcode,
Type * Val,
TTI::TargetCostKind CostKind,
unsigned Index,
Value * Scalar,
ArrayRef< std::tuple< Value *, User *, int > > ScalarUserAndIdx )
static

This is similar to TargetTransformInfo::getVectorInstrCost, but if ScalarTy is a FixedVectorType, a vector will be extracted instead of a scalar.

Definition at line 6552 of file SLPVectorizer.cpp.

References assert(), llvm::cast(), CostKind, llvm::dyn_cast(), getShuffleCost(), llvm::isa(), llvm::TargetTransformInfo::SK_ExtractSubvector, and SLPReVec.

Referenced by llvm::slpvectorizer::BoUpSLP::getTreeCost().

◆ getWidenedType()

◆ isAlternateInstruction()

bool isAlternateInstruction ( Instruction * I,
Instruction * MainOp,
Instruction * AltOp,
const TargetLibraryInfo & TLI )
static

Checks if the specified instruction I is an alternate operation for the given MainOp and AltOp instructions.

Definition at line 12032 of file SLPVectorizer.cpp.

References assert(), llvm::cast(), llvm::dyn_cast(), llvm::CmpInst::getSwappedPredicate(), I, isCmpSameOrSwapped(), and P.

Referenced by llvm::slpvectorizer::BoUpSLP::getReorderingData().

◆ isCmpSameOrSwapped()

bool isCmpSameOrSwapped ( const CmpInst * BaseCI,
const CmpInst * CI,
const TargetLibraryInfo & TLI )
static
Returns
true if a compare instruction CI has similar "look" and same predicate as BaseCI, "as is" or with its operands and predicate swapped, false otherwise.

Definition at line 1451 of file SLPVectorizer.cpp.

References areCompatibleCmpOps(), assert(), llvm::User::getOperand(), llvm::CmpInst::getPredicate(), llvm::CmpInst::getSwappedPredicate(), and llvm::Value::getType().

Referenced by getSameOpcode(), and isAlternateInstruction().

◆ isCommutative() [1/2]

bool isCommutative ( Instruction * I)
static

This is a helper function to check whether I is commutative.

This is a convenience wrapper that calls the two-parameter version of isCommutative with the same instruction for both parameters. This is the common case where the instruction being checked for commutativity is the same as the instruction whose uses are analyzed for special patterns (see the two-parameter version above for details).

Parameters
IThe instruction to check for commutativity
Returns
true if the instruction is commutative, false otherwise

Definition at line 581 of file SLPVectorizer.cpp.

References I, and isCommutative().

◆ isCommutative() [2/2]

bool isCommutative ( Instruction * I,
Value * ValWithUses )
static
Returns
True if I is commutative, handles CmpInst and BinaryOperator. For BinaryOperator, it also checks if InstWithUses is used in specific patterns that make it effectively commutative (like equality comparisons with zero). In most cases, users should not call this function directly (since I and InstWithUses are the same). However, when analyzing interchangeable instructions, we need to use the converted opcode along with the original uses.
Parameters
IThe instruction to check for commutativity
ValWithUsesThe value whose uses are analyzed for special patterns

Definition at line 540 of file SLPVectorizer.cpp.

References llvm::all_of(), llvm::dyn_cast(), llvm::Value::hasNUsesOrMore(), I, llvm::Value::uses(), and UsesLimit.

Referenced by getNumberOfPotentiallyCommutativeOps(), llvm::slpvectorizer::BoUpSLP::LookAheadHeuristics::getScoreAtLevelRec(), isCommutative(), llvm::slpvectorizer::BoUpSLP::isProfitableToReorder(), and llvm::FastISel::selectBinaryOp().

◆ isConstant()

bool isConstant ( Value * V)
static
Returns
True if the value is a constant (but not globals/constant expressions).

Definition at line 431 of file SLPVectorizer.cpp.

References llvm::isa().

Referenced by areCompatibleCmpOps(), and isVectorLikeInstWithConstOps().

◆ isFirstInsertElement()

bool isFirstInsertElement ( const InsertElementInst * IE1,
const InsertElementInst * IE2 )
static

Checks if the IE1 instructions is followed by IE2 instruction in the buildvector sequence.

Definition at line 15749 of file SLPVectorizer.cpp.

References llvm::dyn_cast(), getElementIndex(), llvm::Value::hasOneUse(), and llvm_unreachable.

Referenced by llvm::slpvectorizer::BoUpSLP::getTreeCost().

◆ isFixedVectorShuffle()

std::optional< TargetTransformInfo::ShuffleKind > isFixedVectorShuffle ( ArrayRef< Value * > VL,
SmallVectorImpl< int > & Mask,
AssumptionCache * AC )
static

Checks if the vector of instructions can be represented as a shuffle, like: x0 = extractelement <4 x i8> x, i32 0 x3 = extractelement <4 x i8> x, i32 3 y1 = extractelement <4 x i8> y, i32 1 y2 = extractelement <4 x i8> y, i32 2 x0x0 = mul i8 x0, x0 x3x3 = mul i8 x3, x3 y1y1 = mul i8 y1, y1 y2y2 = mul i8 y2, y2 ins1 = insertelement <4 x i8> poison, i8 x0x0, i32 0 ins2 = insertelement <4 x i8> ins1, i8 x3x3, i32 1 ins3 = insertelement <4 x i8> ins2, i8 y1y1, i32 2 ins4 = insertelement <4 x i8> ins3, i8 y2y2, i32 3 ret <4 x i8> ins4 can be transformed into: %1 = shufflevector <4 x i8> x, <4 x i8> y, <4 x i32> <i32 0, i32 3, i32 5, i32 6> %2 = mul <4 x i8> %1, %1 ret <4 x i8> %2 Mask will return the Shuffle Mask equivalent to the extracted elements.

TODO: Can we split off and reuse the shuffle mask detection from ShuffleVectorInst/getShuffleCost?

Definition at line 782 of file SLPVectorizer.cpp.

References llvm::any_of(), llvm::ArrayRef< T >::begin(), llvm::cast(), llvm::dyn_cast(), E(), llvm::ArrayRef< T >::end(), llvm::find_if(), I, llvm::isa(), llvm::IsaPred, llvm::isGuaranteedNotToBePoison(), isUndefVector(), llvm::PoisonMaskElem, Select, Size, llvm::ArrayRef< T >::size(), llvm::TargetTransformInfo::SK_PermuteSingleSrc, llvm::TargetTransformInfo::SK_PermuteTwoSrc, llvm::TargetTransformInfo::SK_Select, and Unknown.

◆ isLoadCombineCandidateImpl()

◆ isMainInstruction()

bool isMainInstruction ( Instruction * I,
Instruction * MainOp,
Instruction * AltOp,
const TargetLibraryInfo & TLI )
static

Checks if the specified instruction I is an main operation for the given MainOp and AltOp instructions.

Definition at line 12026 of file SLPVectorizer.cpp.

References I.

◆ isMaskedLoadCompress() [1/2]

bool isMaskedLoadCompress ( ArrayRef< Value * > VL,
ArrayRef< Value * > PointerOps,
ArrayRef< unsigned > Order,
const TargetTransformInfo & TTI,
const DataLayout & DL,
ScalarEvolution & SE,
AssumptionCache & AC,
const DominatorTree & DT,
const TargetLibraryInfo & TLI,
const function_ref< bool(Value *)> AreAllUsersVectorized )
static

Checks if the VL can be transformed to a (masked)load + compress or (masked) interleaved load.

Definition at line 6793 of file SLPVectorizer.cpp.

References DL, and isMaskedLoadCompress().

◆ isMaskedLoadCompress() [2/2]

bool isMaskedLoadCompress ( ArrayRef< Value * > VL,
ArrayRef< Value * > PointerOps,
ArrayRef< unsigned > Order,
const TargetTransformInfo & TTI,
const DataLayout & DL,
ScalarEvolution & SE,
AssumptionCache & AC,
const DominatorTree & DT,
const TargetLibraryInfo & TLI,
const function_ref< bool(Value *)> AreAllUsersVectorized,
bool & IsMasked,
unsigned & InterleaveFactor,
SmallVectorImpl< int > & CompressMask,
VectorType *& LoadVecTy )
static

◆ isReductionCandidate()

bool isReductionCandidate ( Instruction * I)
static

\Returns true if I is a candidate instruction for reduction vectorization.

Definition at line 25414 of file SLPVectorizer.cpp.

References I, IsSelect(), llvm::PatternMatch::m_Select(), llvm::PatternMatch::m_Value(), llvm::PatternMatch::match(), and matchRdxBop().

◆ isRepeatedNonIdentityClusteredMask()

bool isRepeatedNonIdentityClusteredMask ( ArrayRef< int > Mask,
unsigned Sz )
static

Checks if the given mask is a "clustered" mask with the same clusters of size Sz, which are not identity submasks.

Definition at line 7748 of file SLPVectorizer.cpp.

References E(), I, and llvm::ShuffleVectorInst::isIdentityMask().

◆ isReverseOrder()

◆ isSimple()

◆ isSplat()

bool isSplat ( ArrayRef< Value * > VL)
static
Returns
True if all of the values in VL are identical or some of them are UndefValue.

Definition at line 514 of file SLPVectorizer.cpp.

References llvm::isa().

◆ isUndefVector()

template<bool IsPoisonOnly = false>
SmallBitVector isUndefVector ( const Value * V,
const SmallBitVector & UseMask = {} )
static

Checks if the given value is actually an undefined constant vector.

Also, if the UseMask is not empty, tries to check if the non-masked elements actually mask the insertelement buildvector, if any.

Definition at line 713 of file SLPVectorizer.cpp.

Referenced by generateKeySubkey(), llvm::slpvectorizer::BoUpSLP::LookAheadHeuristics::getShallowScore(), isFixedVectorShuffle(), and performExtractsShuffleAction().

◆ isValidElementType()

bool isValidElementType ( Type * Ty)
static

Predicate for the element types that the SLP vectorizer supports.

The most important thing to filter here are types which are invalid in LLVM vectors. We also filter target specific types which have absolutely no meaningful vectorization path such as x86_fp80 and ppc_f128. This just avoids spending time checking the cost model and realizing that they will be inevitably scalarized.

Definition at line 252 of file SLPVectorizer.cpp.

References llvm::isa(), llvm::VectorType::isValidElementType(), and SLPReVec.

Referenced by llvm::slpvectorizer::BoUpSLP::canMapToVector(), compareCmp(), llvm::slpvectorizer::BoUpSLP::findReusedOrderedScalars(), gatherPossiblyVectorizableLoads(), getFloorFullVectorNumberOfElements(), getFullVectorNumberOfElements(), llvm::slpvectorizer::BoUpSLP::LookAheadHeuristics::getShallowScore(), and llvm::hasFullVectorsOrPowerOf2().

◆ isVectorLikeInstWithConstOps()

bool isVectorLikeInstWithConstOps ( Value * V)
static

Checks if V is one of vector-like instructions, i.e.

undef, insertelement/extractelement with constant indices for fixed vector type or extractvalue instruction.

Definition at line 438 of file SLPVectorizer.cpp.

References assert(), llvm::dyn_cast(), I, llvm::isa(), and isConstant().

Referenced by allSameBlock(), generateKeySubkey(), and getSameOpcode().

◆ matchRdxBop()

◆ performExtractsShuffleAction()

template<typename T>
T * performExtractsShuffleAction ( MutableArrayRef< std::pair< T *, SmallVector< int > > > ShuffleMask,
Value * Base,
function_ref< unsigned(T *)> GetVF,
function_ref< std::pair< T *, bool >(T *, ArrayRef< int >, bool)> ResizeAction,
function_ref< T *(ArrayRef< int >, ArrayRef< T * >)> Action )
static

Does the analysis of the provided shuffle masks and performs the requested actions on the vectors with the given shuffle masks.

It tries to do it in several steps.

  1. If the Base vector is not undef vector, resizing the very first mask to have common VF and perform action for 2 input vectors (including non-undef Base). Other shuffle masks are combined with the resulting after the 1 stage and processed as a shuffle of 2 elements.
  2. If the Base is undef vector and have only 1 shuffle mask, perform the action only for 1 vector with the given mask, if it is not the identity mask.
  3. If > 2 masks are used, perform the remaining shuffle actions for 2 vectors, combing the masks properly between the steps.

Definition at line 15804 of file SLPVectorizer.cpp.

References llvm::SmallBitVector::all(), assert(), llvm::sampleprof::Base, buildUseMask(), E(), I, isUndefVector(), llvm::PoisonMaskElem, T, and llvm::SmallBitVector::test().

Referenced by llvm::slpvectorizer::BoUpSLP::getTreeCost().

◆ propagateMetadata()

Instruction * propagateMetadata ( Instruction * Inst,
ArrayRef< Value * > VL )
static
Returns
I after propagating metadata from VL only for instructions in VL.

Definition at line 18871 of file SLPVectorizer.cpp.

References llvm::isa(), llvm::propagateMetadata(), and llvm::SmallVectorTemplateBase< T, bool >::push_back().

◆ reorderOrder()

◆ reorderReuses()

void reorderReuses ( SmallVectorImpl< int > & Reuses,
ArrayRef< int > Mask )
static

Reorders the given Reuses mask according to the given Mask.

Reuses contains original mask for the scalars reused in the node. Procedure transform this mask in accordance with the given Mask.

Definition at line 6090 of file SLPVectorizer.cpp.

References assert(), llvm::SmallVectorTemplateCommon< T, typename >::begin(), E(), llvm::SmallVectorTemplateCommon< T, typename >::end(), I, llvm::PoisonMaskElem, llvm::SmallVectorTemplateCommon< T, typename >::size(), and llvm::SmallVectorImpl< T >::swap().

Referenced by llvm::slpvectorizer::BoUpSLP::reorderBottomToTop(), and reorderOrder().

◆ replicateMask()

SmallVector< Constant * > replicateMask ( ArrayRef< Constant * > Val,
unsigned VF )
static

◆ shortBundleName()

std::string shortBundleName ( ArrayRef< Value * > VL,
int Idx = -1 )
static

Print a short descriptor of the instruction bundle suitable for debug output.

Definition at line 470 of file SLPVectorizer.cpp.

References llvm::ArrayRef< T >::front(), and llvm::ArrayRef< T >::size().

Referenced by llvm::slpvectorizer::BoUpSLP::getTreeCost().

◆ STATISTIC()

STATISTIC ( NumVectorInstructions ,
"Number of vector instructions generated"  )

◆ transformScalarShuffleIndiciesToVector()

void transformScalarShuffleIndiciesToVector ( unsigned VecTyNumElements,
SmallVectorImpl< int > & Mask )
static

◆ tryGetSecondaryReductionRoot()

Instruction * tryGetSecondaryReductionRoot ( PHINode * Phi,
Instruction * Root )
static

We could have an initial reduction that is not an add.

r *= v1 + v2 + v3 + v4 In such a case start looking for a tree rooted in the first '+'. \Returns the new root if found, which may be nullptr if not an instruction.

Definition at line 25387 of file SLPVectorizer.cpp.

References assert(), llvm::dyn_cast(), llvm::User::getOperand(), llvm::isa(), LHS, and RHS.

◆ tryToFindDuplicates()

◆ tryToVectorizeSequence()

Variable Documentation

◆ AliasedCheckLimit

const unsigned AliasedCheckLimit = 10
static

Definition at line 227 of file SLPVectorizer.cpp.

◆ DisableTreeReorder

cl::opt< bool > DisableTreeReorder("slp-disable-tree-reorder", cl::init(false), cl::Hidden, cl::desc("Disable tree reordering even if it is " "profitable. Used for testing only.")) ( "slp-disable-tree-reorder" ,
cl::init(false) ,
cl::Hidden ,
cl::desc("Disable tree reordering even if it is " "profitable. Used for testing only.")  )
static

◆ ForceStridedLoads

cl::opt< bool > ForceStridedLoads("slp-force-strided-loads", cl::init(false), cl::Hidden, cl::desc("Generate strided loads even if they are not " "profitable. Used for testing only.")) ( "slp-force-strided-loads" ,
cl::init(false) ,
cl::Hidden ,
cl::desc("Generate strided loads even if they are not " "profitable. Used for testing only.")  )
static

◆ LookAheadMaxDepth

cl::opt< int > LookAheadMaxDepth("slp-max-look-ahead-depth", cl::init(2), cl::Hidden, cl::desc("The maximum look-ahead depth for operand reordering scores")) ( "slp-max-look-ahead-depth" ,
cl::init(2) ,
cl::Hidden ,
cl::desc("The maximum look-ahead depth for operand reordering scores")  )
static

◆ MaxMemDepDistance

const unsigned MaxMemDepDistance = 160
static

Definition at line 236 of file SLPVectorizer.cpp.

◆ MaxPHINumOperands

const unsigned MaxPHINumOperands = 128
static

Maximum allowed number of operands in the PHI nodes.

Definition at line 243 of file SLPVectorizer.cpp.

◆ MaxProfitableLoadStride

cl::opt< unsigned > MaxProfitableLoadStride("slp-max-stride", cl::init(8), cl::Hidden, cl::desc("The maximum stride, considered to be profitable.")) ( "slp-max-stride" ,
cl::init(8) ,
cl::Hidden ,
cl::desc("The maximum stride, considered to be profitable.")  )
static

◆ MaxVectorRegSizeOption

cl::opt< int > MaxVectorRegSizeOption("slp-max-reg-size", cl::init(128), cl::Hidden, cl::desc("Attempt to vectorize for this register size in bits")) ( "slp-max-reg-size" ,
cl::init(128) ,
cl::Hidden ,
cl::desc("Attempt to vectorize for this register size in bits")  )
static

◆ MaxVFOption

cl::opt< unsigned > MaxVFOption("slp-max-vf", cl::init(0), cl::Hidden, cl::desc("Maximum SLP vectorization factor (0=unlimited)")) ( "slp-max-vf" ,
cl::init(0) ,
cl::Hidden ,
cl::desc("Maximum SLP vectorization factor (0=unlimited)")  )
static

◆ MinProfitableStridedLoads

cl::opt< unsigned > MinProfitableStridedLoads("slp-min-strided-loads", cl::init(2), cl::Hidden, cl::desc("The minimum number of loads, which should be considered strided, " "if the stride is > 1 or is runtime value")) ( "slp-min-strided-loads" ,
cl::init(2) ,
cl::Hidden ,
cl::desc("The minimum number of loads, which should be considered strided, " "if the stride is > 1 or is runtime value")  )
static

◆ MinScheduleRegionSize

const int MinScheduleRegionSize = 16
static

If the ScheduleRegionSizeBudget is exhausted, we allow small scheduling regions to be handled.

Definition at line 240 of file SLPVectorizer.cpp.

◆ MinTreeSize

cl::opt< unsigned > MinTreeSize("slp-min-tree-size", cl::init(3), cl::Hidden, cl::desc("Only vectorize small trees if they are fully vectorizable")) ( "slp-min-tree-size" ,
cl::init(3) ,
cl::Hidden ,
cl::desc("Only vectorize small trees if they are fully vectorizable")  )
static

◆ MinVectorRegSizeOption

cl::opt< int > MinVectorRegSizeOption("slp-min-reg-size", cl::init(128), cl::Hidden, cl::desc("Attempt to vectorize for this register size in bits")) ( "slp-min-reg-size" ,
cl::init(128) ,
cl::Hidden ,
cl::desc("Attempt to vectorize for this register size in bits")  )
static

◆ RecursionMaxDepth

cl::opt< unsigned > RecursionMaxDepth("slp-recursion-max-depth", cl::init(12), cl::Hidden, cl::desc("Limit the recursion depth when building a vectorizable tree")) ( "slp-recursion-max-depth" ,
cl::init(12) ,
cl::Hidden ,
cl::desc("Limit the recursion depth when building a vectorizable tree")  )
static

◆ RootLookAheadMaxDepth

cl::opt< int > RootLookAheadMaxDepth("slp-max-root-look-ahead-depth", cl::init(2), cl::Hidden, cl::desc("The maximum look-ahead depth for searching best rooting option")) ( "slp-max-root-look-ahead-depth" ,
cl::init(2) ,
cl::Hidden ,
cl::desc("The maximum look-ahead depth for searching best rooting option")  )
static

◆ RunSLPVectorization

cl::opt< bool > RunSLPVectorization("vectorize-slp", cl::init(true), cl::Hidden, cl::desc("Run the SLP vectorization passes")) ( "vectorize-slp" ,
cl::init(true) ,
cl::Hidden ,
cl::desc("Run the SLP vectorization passes")  )
static

◆ ScheduleRegionSizeBudget

cl::opt< int > ScheduleRegionSizeBudget("slp-schedule-budget", cl::init(100000), cl::Hidden, cl::desc("Limit the size of the SLP scheduling region per block")) ( "slp-schedule-budget" ,
cl::init(100000) ,
cl::Hidden ,
cl::desc("Limit the size of the SLP scheduling region per block")  )
static

Limits the size of scheduling regions in a block.

It avoid long compile times for very large blocks where vector instructions are spread over a wide range. This limit is way higher than needed by real-world functions.

Referenced by llvm::slpvectorizer::BoUpSLP::getSpillCost().

◆ ShouldStartVectorizeHorAtStore

cl::opt< bool > ShouldStartVectorizeHorAtStore("slp-vectorize-hor-store", cl::init(false), cl::Hidden, cl::desc( "Attempt to vectorize horizontal reductions feeding into a store")) ( "slp-vectorize-hor-store" ,
cl::init(false) ,
cl::Hidden ,
cl::desc( "Attempt to vectorize horizontal reductions feeding into a store")  )
static

◆ ShouldVectorizeHor

cl::opt< bool > ShouldVectorizeHor("slp-vectorize-hor", cl::init(true), cl::Hidden, cl::desc("Attempt to vectorize horizontal reductions")) ( "slp-vectorize-hor" ,
cl::init(true) ,
cl::Hidden ,
cl::desc("Attempt to vectorize horizontal reductions")  )
static

◆ SLPCostThreshold

cl::opt< int > SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden, cl::desc("Only vectorize if you gain more than this " "number ")) ( "slp-threshold" ,
cl::init(0) ,
cl::Hidden ,
cl::desc("Only vectorize if you gain more than this " "number ")  )
static

◆ SLPReVec

cl::opt< bool > SLPReVec("slp-revec", cl::init(false), cl::Hidden, cl::desc("Enable vectorization for wider vector utilization")) ( "slp-revec" ,
cl::init(false) ,
cl::Hidden ,
cl::desc("Enable vectorization for wider vector utilization")  )
static

◆ SLPSkipEarlyProfitabilityCheck

cl::opt< bool > SLPSkipEarlyProfitabilityCheck("slp-skip-early-profitability-check", cl::init(false), cl::Hidden, cl::desc("When true, SLP vectorizer bypasses profitability checks based on " "heuristics and makes vectorization decision via cost modeling.")) ( "slp-skip-early-profitability-check" ,
cl::init(false) ,
cl::Hidden ,
cl::desc("When true, SLP vectorizer bypasses profitability checks based on " "heuristics and makes vectorization decision via cost modeling.")  )
static

◆ SplitAlternateInstructions

cl::opt< bool > SplitAlternateInstructions("slp-split-alternate-instructions", cl::init(true), cl::Hidden, cl::desc("Improve the code quality by splitting alternate instructions")) ( "slp-split-alternate-instructions" ,
cl::init(true) ,
cl::Hidden ,
cl::desc("Improve the code quality by splitting alternate instructions")  )
static

◆ UsesLimit

◆ VectorizeCopyableElements

cl::opt< bool > VectorizeCopyableElements("slp-copyable-elements", cl::init(true), cl::Hidden, cl::desc("Try to replace values with the idempotent instructions for " "better vectorization.")) ( "slp-copyable-elements" ,
cl::init(true) ,
cl::Hidden ,
cl::desc("Try to replace values with the idempotent instructions for " "better vectorization.")  )
static

Enables vectorization of copyable elements.

◆ VectorizeNonPowerOf2

cl::opt< bool > VectorizeNonPowerOf2("slp-vectorize-non-power-of-2", cl::init(false), cl::Hidden, cl::desc("Try to vectorize with non-power-of-2 number of elements.")) ( "slp-vectorize-non-power-of-2" ,
cl::init(false) ,
cl::Hidden ,
cl::desc("Try to vectorize with non-power-of-2 number of elements.")  )
static

◆ ViewSLPTree

cl::opt< bool > ViewSLPTree("view-slp-tree", cl::Hidden, cl::desc("Display the SLP trees with Graphviz")) ( "view-slp-tree" ,
cl::Hidden ,
cl::desc("Display the SLP trees with Graphviz")  )
static