LLVM 22.0.0git
|
#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/PriorityQueue.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/DOTGraphTraits.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DebugCounter.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/InstructionCost.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/InjectTLIMappings.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <iterator>
#include <memory>
#include <optional>
#include <set>
#include <string>
#include <tuple>
#include <utility>
Go to the source code of this file.
Classes | |
class | llvm::slpvectorizer::BoUpSLP |
Bottom Up SLP Vectorizer. More... | |
struct | llvm::slpvectorizer::BoUpSLP::EdgeInfo |
This structure holds any data we need about the edges being traversed during buildTreeRec(). More... | |
class | llvm::slpvectorizer::BoUpSLP::LookAheadHeuristics |
A helper class used for scoring candidates for two consecutive lanes. More... | |
class | llvm::slpvectorizer::BoUpSLP::VLOperands |
A helper data structure to hold the operands of a vector of instructions. More... | |
struct | llvm::DenseMapInfo< BoUpSLP::EdgeInfo > |
struct | llvm::GraphTraits< BoUpSLP * > |
struct | llvm::GraphTraits< BoUpSLP * >::ChildIteratorType |
Add the VectorizableTree to the index iterator to be able to return TreeEntry pointers. More... | |
class | llvm::GraphTraits< BoUpSLP * >::nodes_iterator |
For the node iterator we just need to turn the TreeEntry iterator into a TreeEntry* iterator so that it dereferences to NodeRef. More... | |
struct | llvm::DOTGraphTraits< BoUpSLP * > |
class | llvm::slpvectorizer::BoUpSLP::ShuffleCostEstimator |
Merges shuffle masks and emits final shuffle instruction, if required. More... | |
class | llvm::slpvectorizer::BoUpSLP::ShuffleInstructionBuilder |
Merges shuffle masks and emits final shuffle instruction, if required. More... | |
Namespaces | |
namespace | llvm |
This is an optimization pass for GlobalISel generic memory operations. | |
namespace | llvm::slpvectorizer |
A private "module" namespace for types and utilities used by this pass. | |
Macros | |
#define | SV_NAME "slp-vectorizer" |
#define | DEBUG_TYPE "SLP" |
Functions | |
STATISTIC (NumVectorInstructions, "Number of vector instructions generated") | |
DEBUG_COUNTER (VectorizedGraphs, "slp-vectorized", "Controls which SLP graphs should be vectorized.") | |
static bool | isValidElementType (Type *Ty) |
Predicate for the element types that the SLP vectorizer supports. | |
static Type * | getValueType (Value *V) |
Returns the type of the given value/instruction V . | |
static unsigned | getNumElements (Type *Ty) |
static FixedVectorType * | getWidenedType (Type *ScalarTy, unsigned VF) |
static unsigned | getFullVectorNumberOfElements (const TargetTransformInfo &TTI, Type *Ty, unsigned Sz) |
Returns the number of elements of the given type Ty , not less than Sz , which forms type, which splits by TTI into whole vector types during legalization. | |
static unsigned | getFloorFullVectorNumberOfElements (const TargetTransformInfo &TTI, Type *Ty, unsigned Sz) |
Returns the number of elements of the given type Ty , not greater than Sz , which forms type, which splits by TTI into whole vector types during legalization. | |
static void | transformScalarShuffleIndiciesToVector (unsigned VecTyNumElements, SmallVectorImpl< int > &Mask) |
static unsigned | getShufflevectorNumGroups (ArrayRef< Value * > VL) |
static SmallVector< int > | calculateShufflevectorMask (ArrayRef< Value * > VL) |
static bool | isConstant (Value *V) |
static bool | isVectorLikeInstWithConstOps (Value *V) |
Checks if V is one of vector-like instructions, i.e. | |
static unsigned | getPartNumElems (unsigned Size, unsigned NumParts) |
Returns power-of-2 number of elements in a single register (part), given the total number of elements Size and number of registers (parts) NumParts . | |
static unsigned | getNumElems (unsigned Size, unsigned PartNumElems, unsigned Part) |
Returns correct remaining number of elements, considering total amount Size , (power-of-2 number) of elements in a single register PartNumElems and current register (part) Part . | |
static std::string | shortBundleName (ArrayRef< Value * > VL, int Idx=-1) |
Print a short descriptor of the instruction bundle suitable for debug output. | |
static bool | allSameBlock (ArrayRef< Value * > VL) |
static bool | allConstant (ArrayRef< Value * > VL) |
static bool | isSplat (ArrayRef< Value * > VL) |
static bool | isCommutative (Instruction *I, Value *ValWithUses) |
static bool | isCommutative (Instruction *I) |
This is a helper function to check whether I is commutative. | |
static unsigned | getNumberOfPotentiallyCommutativeOps (Instruction *I) |
template<typename T > | |
static std::optional< unsigned > | getInsertExtractIndex (const Value *Inst, unsigned Offset) |
static std::optional< unsigned > | getElementIndex (const Value *Inst, unsigned Offset=0) |
static bool | allSameOpcode (ArrayRef< Value * > VL) |
static SmallBitVector | buildUseMask (int VF, ArrayRef< int > Mask, UseMask MaskArg) |
Prepares a use bitset for the given mask either for the first argument or for the second. | |
template<bool IsPoisonOnly = false> | |
static SmallBitVector | isUndefVector (const Value *V, const SmallBitVector &UseMask={}) |
Checks if the given value is actually an undefined constant vector. | |
static std::optional< TargetTransformInfo::ShuffleKind > | isFixedVectorShuffle (ArrayRef< Value * > VL, SmallVectorImpl< int > &Mask, AssumptionCache *AC) |
Checks if the vector of instructions can be represented as a shuffle, like: x0 = extractelement <4 x i8> x, i32 0 x3 = extractelement <4 x i8> x, i32 3 y1 = extractelement <4 x i8> y, i32 1 y2 = extractelement <4 x i8> y, i32 2 x0x0 = mul i8 x0, x0 x3x3 = mul i8 x3, x3 y1y1 = mul i8 y1, y1 y2y2 = mul i8 y2, y2 ins1 = insertelement <4 x i8> poison, i8 x0x0, i32 0 ins2 = insertelement <4 x i8> ins1, i8 x3x3, i32 1 ins3 = insertelement <4 x i8> ins2, i8 y1y1, i32 2 ins4 = insertelement <4 x i8> ins3, i8 y2y2, i32 3 ret <4 x i8> ins4 can be transformed into: %1 = shufflevector <4 x i8> x, <4 x i8> y, <4 x i32> <i32 0, i32 3, i32 5, i32 6> %2 = mul <4 x i8> %1, %1 ret <4 x i8> %2 Mask will return the Shuffle Mask equivalent to the extracted elements. | |
static std::optional< unsigned > | getExtractIndex (const Instruction *E) |
static bool | llvm::areAllOperandsNonInsts (Value *V) |
Checks if the provided value does not require scheduling. | |
static bool | llvm::isUsedOutsideBlock (Value *V) |
Checks if the provided value does not require scheduling. | |
static bool | llvm::doesNotNeedToBeScheduled (Value *V) |
Checks if the specified value does not require scheduling. | |
static InstructionsState | getSameOpcode (ArrayRef< Value * > VL, const TargetLibraryInfo &TLI) |
static Instruction * | findInstructionWithOpcode (ArrayRef< Value * > VL, unsigned Opcode) |
Find an instruction with a specific opcode in VL. | |
static bool | areCompatibleCmpOps (Value *BaseOp0, Value *BaseOp1, Value *Op0, Value *Op1, const TargetLibraryInfo &TLI) |
Checks if the provided operands of 2 cmp instructions are compatible, i.e. | |
static bool | isCmpSameOrSwapped (const CmpInst *BaseCI, const CmpInst *CI, const TargetLibraryInfo &TLI) |
static bool | allSameType (ArrayRef< Value * > VL) |
static bool | doesInTreeUserNeedToExtract (Value *Scalar, Instruction *UserInst, TargetLibraryInfo *TLI, const TargetTransformInfo *TTI) |
static MemoryLocation | getLocation (Instruction *I) |
static bool | isSimple (Instruction *I) |
static void | addMask (SmallVectorImpl< int > &Mask, ArrayRef< int > SubMask, bool ExtendingManyInputs=false) |
Shuffles Mask in accordance with the given SubMask . | |
static void | fixupOrderingIndices (MutableArrayRef< unsigned > Order) |
Order may have elements assigned special value (size) which is out of bounds. | |
static SmallBitVector | getAltInstrMask (ArrayRef< Value * > VL, Type *ScalarTy, unsigned Opcode0, unsigned Opcode1) |
static SmallVector< Constant * > | replicateMask (ArrayRef< Constant * > Val, unsigned VF) |
Replicates the given Val VF times. | |
static void | llvm::inversePermutation (ArrayRef< unsigned > Indices, SmallVectorImpl< int > &Mask) |
static void | llvm::reorderScalars (SmallVectorImpl< Value * > &Scalars, ArrayRef< int > Mask) |
Reorders the list of scalars in accordance with the given Mask . | |
static bool | llvm::doesNotNeedToSchedule (ArrayRef< Value * > VL) |
Checks if the specified array of instructions does not require scheduling. | |
static bool | llvm::hasFullVectorsOrPowerOf2 (const TargetTransformInfo &TTI, Type *Ty, unsigned Sz) |
Returns true if widened type of Ty elements with size Sz represents full vector type, i.e. | |
static unsigned | llvm::getNumberOfParts (const TargetTransformInfo &TTI, VectorType *VecTy, const unsigned Limit=std::numeric_limits< unsigned >::max()) |
Returns number of parts, the type VecTy will be split at the codegen phase. | |
static void | reorderReuses (SmallVectorImpl< int > &Reuses, ArrayRef< int > Mask) |
Reorders the given Reuses mask according to the given Mask . | |
static void | reorderOrder (SmallVectorImpl< unsigned > &Order, ArrayRef< int > Mask, bool BottomOrder=false) |
Reorders the given Order according to the given Mask . | |
static bool | arePointersCompatible (Value *Ptr1, Value *Ptr2, const TargetLibraryInfo &TLI, bool CompareOpcodes=true) |
template<typename T > | |
static Align | computeCommonAlignment (ArrayRef< Value * > VL) |
Calculates minimal alignment as a common alignment. | |
static bool | isReverseOrder (ArrayRef< unsigned > Order) |
Check if Order represents reverse order. | |
static std::optional< Value * > | calculateRtStride (ArrayRef< Value * > PointerOps, Type *ElemTy, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl< unsigned > &SortedIndices, Instruction *Inst=nullptr) |
Checks if the provided list of pointers Pointers represents the strided pointers for type ElemTy. | |
static std::pair< InstructionCost, InstructionCost > | getGEPCosts (const TargetTransformInfo &TTI, ArrayRef< Value * > Ptrs, Value *BasePtr, unsigned Opcode, TTI::TargetCostKind CostKind, Type *ScalarTy, VectorType *VecTy) |
Calculate the scalar and the vector costs from vectorizing set of GEPs. | |
static InstructionCost | getShuffleCost (const TargetTransformInfo &TTI, TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask={}, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, int Index=0, VectorType *SubTp=nullptr, ArrayRef< const Value * > Args={}) |
Returns the cost of the shuffle instructions with the given Kind , vector type Tp and optional Mask . | |
static InstructionCost | getScalarizationOverhead (const TargetTransformInfo &TTI, Type *ScalarTy, VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) |
This is similar to TargetTransformInfo::getScalarizationOverhead, but if ScalarTy is a FixedVectorType, a vector will be inserted or extracted instead of a scalar. | |
static InstructionCost | getVectorInstrCost (const TargetTransformInfo &TTI, Type *ScalarTy, unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Scalar, ArrayRef< std::tuple< Value *, User *, int > > ScalarUserAndIdx) |
This is similar to TargetTransformInfo::getVectorInstrCost, but if ScalarTy is a FixedVectorType, a vector will be extracted instead of a scalar. | |
static InstructionCost | getExtractWithExtendCost (const TargetTransformInfo &TTI, unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) |
This is similar to TargetTransformInfo::getExtractWithExtendCost, but if Dst is a FixedVectorType, a vector will be extracted instead of a scalar. | |
static Value * | createInsertVector (IRBuilderBase &Builder, Value *Vec, Value *V, unsigned Index, function_ref< Value *(Value *, Value *, ArrayRef< int >)> Generator={}) |
Creates subvector insert. | |
static Value * | createExtractVector (IRBuilderBase &Builder, Value *Vec, unsigned SubVecVF, unsigned Index) |
Generates subvector extract using Generator or using default shuffle. | |
static bool | buildCompressMask (ArrayRef< Value * > PointerOps, ArrayRef< unsigned > Order, Type *ScalarTy, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl< int > &CompressMask) |
Builds compress-like mask for shuffles for the given PointerOps , ordered with Order . | |
static bool | isMaskedLoadCompress (ArrayRef< Value * > VL, ArrayRef< Value * > PointerOps, ArrayRef< unsigned > Order, const TargetTransformInfo &TTI, const DataLayout &DL, ScalarEvolution &SE, AssumptionCache &AC, const DominatorTree &DT, const TargetLibraryInfo &TLI, const function_ref< bool(Value *)> AreAllUsersVectorized, bool &IsMasked, unsigned &InterleaveFactor, SmallVectorImpl< int > &CompressMask, VectorType *&LoadVecTy) |
Checks if the VL can be transformed to a (masked)load + compress or (masked) interleaved load. | |
static bool | isMaskedLoadCompress (ArrayRef< Value * > VL, ArrayRef< Value * > PointerOps, ArrayRef< unsigned > Order, const TargetTransformInfo &TTI, const DataLayout &DL, ScalarEvolution &SE, AssumptionCache &AC, const DominatorTree &DT, const TargetLibraryInfo &TLI, const function_ref< bool(Value *)> AreAllUsersVectorized) |
Checks if the VL can be transformed to a (masked)load + compress or (masked) interleaved load. | |
static bool | isStridedLoad (ArrayRef< Value * > VL, ArrayRef< Value * > PointerOps, ArrayRef< unsigned > Order, const TargetTransformInfo &TTI, const DataLayout &DL, ScalarEvolution &SE, const bool IsAnyPointerUsedOutGraph, const int64_t Diff) |
Checks if strided loads can be generated out of VL loads with pointers PointerOps: | |
static bool | clusterSortPtrAccesses (ArrayRef< Value * > VL, ArrayRef< BasicBlock * > BBs, Type *ElemTy, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl< unsigned > &SortedIndices) |
static bool | areTwoInsertFromSameBuildVector (InsertElementInst *VU, InsertElementInst *V, function_ref< Value *(InsertElementInst *)> GetBaseOperand) |
Check if two insertelement instructions are from the same buildvector. | |
static bool | isAlternateInstruction (Instruction *I, Instruction *MainOp, Instruction *AltOp, const TargetLibraryInfo &TLI) |
Checks if the specified instruction I is an alternate operation for the given MainOp and AltOp instructions. | |
static bool | isRepeatedNonIdentityClusteredMask (ArrayRef< int > Mask, unsigned Sz) |
Checks if the given mask is a "clustered" mask with the same clusters of size Sz , which are not identity submasks. | |
static void | combineOrders (MutableArrayRef< unsigned > Order, ArrayRef< unsigned > SecondaryOrder) |
static LLVM_DUMP_METHOD void | dumpOrder (const BoUpSLP::OrdersType &Order) |
static void | gatherPossiblyVectorizableLoads (const BoUpSLP &R, ArrayRef< Value * > VL, const DataLayout &DL, ScalarEvolution &SE, const TargetTransformInfo &TTI, SmallVectorImpl< SmallVector< std::pair< LoadInst *, int64_t > > > &GatheredLoads, bool AddNew=true) |
Tries to find subvector of loads and builds new vector of only loads if can be profitable. | |
static std::pair< size_t, size_t > | generateKeySubkey (Value *V, const TargetLibraryInfo *TLI, function_ref< hash_code(size_t, LoadInst *)> LoadsSubkeyGenerator, bool AllowAlternate) |
Generates key/subkey pair for the given value to provide effective sorting of the values and better detection of the vectorizable values sequences. | |
static bool | isMainInstruction (Instruction *I, Instruction *MainOp, Instruction *AltOp, const TargetLibraryInfo &TLI) |
Checks if the specified instruction I is an main operation for the given MainOp and AltOp instructions. | |
static SmallVector< Type * > | buildIntrinsicArgTypes (const CallInst *CI, const Intrinsic::ID ID, const unsigned VF, unsigned MinBW, const TargetTransformInfo *TTI) |
Builds the arguments types vector for the given call instruction with the given ID for the specified vector factor. | |
static std::pair< InstructionCost, InstructionCost > | getVectorCallCosts (CallInst *CI, FixedVectorType *VecTy, TargetTransformInfo *TTI, TargetLibraryInfo *TLI, ArrayRef< Type * > ArgTys) |
Calculates the costs of vectorized intrinsic (if possible) and vectorized function (if possible) calls. | |
static std::pair< Instruction *, Instruction * > | getMainAltOpsNoStateVL (ArrayRef< Value * > VL) |
Returns main/alternate instructions for the given VL . | |
static bool | tryToFindDuplicates (SmallVectorImpl< Value * > &VL, SmallVectorImpl< int > &ReuseShuffleIndices, const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI, const InstructionsState &S, const BoUpSLP::EdgeInfo &UserTreeIdx, bool TryPad=false) |
Checks that every instruction appears once in the list and if not, packs them, building ReuseShuffleIndices mask and mutating VL . | |
static InstructionCost | canConvertToFMA (ArrayRef< Value * > VL, const InstructionsState &S, DominatorTree &DT, const DataLayout &DL, TargetTransformInfo &TTI, const TargetLibraryInfo &TLI) |
Check if we can convert fadd/fsub sequence to FMAD. | |
static bool | isLoadCombineCandidateImpl (Value *Root, unsigned NumElts, TargetTransformInfo *TTI, bool MustMatchOrInst) |
static bool | isFirstInsertElement (const InsertElementInst *IE1, const InsertElementInst *IE2) |
Checks if the IE1 instructions is followed by IE2 instruction in the buildvector sequence. | |
template<typename T > | |
static T * | performExtractsShuffleAction (MutableArrayRef< std::pair< T *, SmallVector< int > > > ShuffleMask, Value *Base, function_ref< unsigned(T *)> GetVF, function_ref< std::pair< T *, bool >(T *, ArrayRef< int >, bool)> ResizeAction, function_ref< T *(ArrayRef< int >, ArrayRef< T * >)> Action) |
Does the analysis of the provided shuffle masks and performs the requested actions on the vectors with the given shuffle masks. | |
static Instruction * | propagateMetadata (Instruction *Inst, ArrayRef< Value * > VL) |
static DebugLoc | getDebugLocFromPHI (PHINode &PN) |
static RecurKind | getRdxKind (Value *V) |
Gets recurrence kind from the specified value. | |
static bool | checkTreeSizes (ArrayRef< std::pair< unsigned, unsigned > > Sizes, bool First) |
Checks if the quadratic mean deviation is less than 90% of the mean size. | |
static std::optional< unsigned > | getAggregateSize (Instruction *InsertInst) |
static void | findBuildAggregateRec (Instruction *LastInsertInst, TargetTransformInfo *TTI, SmallVectorImpl< Value * > &BuildVectorOpds, SmallVectorImpl< Value * > &InsertElts, unsigned OperandOffset, const BoUpSLP &R) |
static bool | findBuildAggregate (Instruction *LastInsertInst, TargetTransformInfo *TTI, SmallVectorImpl< Value * > &BuildVectorOpds, SmallVectorImpl< Value * > &InsertElts, const BoUpSLP &R) |
Recognize construction of vectors like ra = insertelement <4 x float> poison, float s0, i32 0 rb = insertelement <4 x float> ra, float s1, i32 1 rc = insertelement <4 x float> rb, float s2, i32 2 rd = insertelement <4 x float> rc, float s3, i32 3 starting from the last insertelement or insertvalue instruction. | |
static Instruction * | getReductionInstr (const DominatorTree *DT, PHINode *P, BasicBlock *ParentBB, LoopInfo *LI) |
Try and get a reduction instruction from a phi node. | |
static bool | matchRdxBop (Instruction *I, Value *&V0, Value *&V1) |
static Instruction * | tryGetSecondaryReductionRoot (PHINode *Phi, Instruction *Root) |
We could have an initial reduction that is not an add. | |
static Instruction * | getNonPhiOperand (Instruction *I, PHINode *Phi) |
Returns the first operand of I that does not match Phi . | |
static bool | isReductionCandidate (Instruction *I) |
\Returns true if I is a candidate instruction for reduction vectorization. | |
template<typename T > | |
static bool | tryToVectorizeSequence (SmallVectorImpl< T * > &Incoming, function_ref< bool(T *, T *)> Comparator, function_ref< bool(T *, T *)> AreCompatible, function_ref< bool(ArrayRef< T * >, bool)> TryToVectorizeHelper, bool MaxVFOnly, BoUpSLP &R) |
template<bool IsCompatibility> | |
static bool | compareCmp (Value *V, Value *V2, TargetLibraryInfo &TLI, const DominatorTree &DT) |
Compare two cmp instructions. | |
Variables | |
static cl::opt< bool > | RunSLPVectorization ("vectorize-slp", cl::init(true), cl::Hidden, cl::desc("Run the SLP vectorization passes")) |
static cl::opt< bool > | SLPReVec ("slp-revec", cl::init(false), cl::Hidden, cl::desc("Enable vectorization for wider vector utilization")) |
static cl::opt< int > | SLPCostThreshold ("slp-threshold", cl::init(0), cl::Hidden, cl::desc("Only vectorize if you gain more than this " "number ")) |
static cl::opt< bool > | SLPSkipEarlyProfitabilityCheck ("slp-skip-early-profitability-check", cl::init(false), cl::Hidden, cl::desc("When true, SLP vectorizer bypasses profitability checks based on " "heuristics and makes vectorization decision via cost modeling.")) |
static cl::opt< bool > | ShouldVectorizeHor ("slp-vectorize-hor", cl::init(true), cl::Hidden, cl::desc("Attempt to vectorize horizontal reductions")) |
static cl::opt< bool > | ShouldStartVectorizeHorAtStore ("slp-vectorize-hor-store", cl::init(false), cl::Hidden, cl::desc("Attempt to vectorize horizontal reductions feeding into a store")) |
static cl::opt< bool > | SplitAlternateInstructions ("slp-split-alternate-instructions", cl::init(true), cl::Hidden, cl::desc("Improve the code quality by splitting alternate instructions")) |
static cl::opt< int > | MaxVectorRegSizeOption ("slp-max-reg-size", cl::init(128), cl::Hidden, cl::desc("Attempt to vectorize for this register size in bits")) |
static cl::opt< unsigned > | MaxVFOption ("slp-max-vf", cl::init(0), cl::Hidden, cl::desc("Maximum SLP vectorization factor (0=unlimited)")) |
static cl::opt< int > | ScheduleRegionSizeBudget ("slp-schedule-budget", cl::init(100000), cl::Hidden, cl::desc("Limit the size of the SLP scheduling region per block")) |
Limits the size of scheduling regions in a block. | |
static cl::opt< int > | MinVectorRegSizeOption ("slp-min-reg-size", cl::init(128), cl::Hidden, cl::desc("Attempt to vectorize for this register size in bits")) |
static cl::opt< unsigned > | RecursionMaxDepth ("slp-recursion-max-depth", cl::init(12), cl::Hidden, cl::desc("Limit the recursion depth when building a vectorizable tree")) |
static cl::opt< unsigned > | MinTreeSize ("slp-min-tree-size", cl::init(3), cl::Hidden, cl::desc("Only vectorize small trees if they are fully vectorizable")) |
static cl::opt< int > | LookAheadMaxDepth ("slp-max-look-ahead-depth", cl::init(2), cl::Hidden, cl::desc("The maximum look-ahead depth for operand reordering scores")) |
static cl::opt< int > | RootLookAheadMaxDepth ("slp-max-root-look-ahead-depth", cl::init(2), cl::Hidden, cl::desc("The maximum look-ahead depth for searching best rooting option")) |
static cl::opt< unsigned > | MinProfitableStridedLoads ("slp-min-strided-loads", cl::init(2), cl::Hidden, cl::desc("The minimum number of loads, which should be considered strided, " "if the stride is > 1 or is runtime value")) |
static cl::opt< unsigned > | MaxProfitableLoadStride ("slp-max-stride", cl::init(8), cl::Hidden, cl::desc("The maximum stride, considered to be profitable.")) |
static cl::opt< bool > | ViewSLPTree ("view-slp-tree", cl::Hidden, cl::desc("Display the SLP trees with Graphviz")) |
static cl::opt< bool > | VectorizeNonPowerOf2 ("slp-vectorize-non-power-of-2", cl::init(false), cl::Hidden, cl::desc("Try to vectorize with non-power-of-2 number of elements.")) |
static cl::opt< bool > | VectorizeCopyableElements ("slp-copyable-elements", cl::init(true), cl::Hidden, cl::desc("Try to replace values with the idempotent instructions for " "better vectorization.")) |
Enables vectorization of copyable elements. | |
static const unsigned | AliasedCheckLimit = 10 |
static constexpr int | UsesLimit = 64 |
static const unsigned | MaxMemDepDistance = 160 |
static const int | MinScheduleRegionSize = 16 |
If the ScheduleRegionSizeBudget is exhausted, we allow small scheduling regions to be handled. | |
static const unsigned | MaxPHINumOperands = 128 |
Maximum allowed number of operands in the PHI nodes. | |
#define DEBUG_TYPE "SLP" |
Definition at line 111 of file SLPVectorizer.cpp.
#define SV_NAME "slp-vectorizer" |
Definition at line 110 of file SLPVectorizer.cpp.
|
static |
Shuffles Mask
in accordance with the given SubMask
.
ExtendingManyInputs | Supports reshuffling of the mask with not only one but two input vectors. |
Definition at line 1717 of file SLPVectorizer.cpp.
References assert(), llvm::ArrayRef< T >::begin(), llvm::ArrayRef< T >::empty(), llvm::ArrayRef< T >::end(), I, llvm::PoisonMaskElem, and llvm::ArrayRef< T >::size().
Referenced by llvm::slpvectorizer::BoUpSLP::getReorderingData(), and llvm::slpvectorizer::BoUpSLP::reorderTopToBottom().
VL
are constants (but not globals/constant expressions). Definition at line 496 of file SLPVectorizer.cpp.
References llvm::all_of(), and isConstant().
Referenced by llvm::slpvectorizer::BoUpSLP::getReorderingData(), llvm::slpvectorizer::BoUpSLP::getSpillCost(), llvm::slpvectorizer::BoUpSLP::isTreeNotExtendable(), and llvm::slpvectorizer::BoUpSLP::isTreeTinyAndNotFullyVectorizable().
VL
are in the same block or false otherwise. Definition at line 472 of file SLPVectorizer.cpp.
References llvm::all_of(), llvm::ArrayRef< T >::end(), llvm::find_if(), llvm::ilist_detail::node_parent_access< NodeTy, ParentTy >::getParent(), II, and isVectorLikeInstWithConstOps().
Referenced by llvm::slpvectorizer::BoUpSLP::isTreeNotExtendable(), and llvm::slpvectorizer::BoUpSLP::isTreeTinyAndNotFullyVectorizable().
VL
use the same opcode. For comparison instructions, also checks if predicates match. PoisonValues are considered matching. Interchangeable instructions are not considered. Definition at line 645 of file SLPVectorizer.cpp.
References llvm::CmpInst::BAD_ICMP_PREDICATE, llvm::ArrayRef< T >::end(), llvm::find_if(), and llvm::Instruction::getOpcode().
Referenced by getSameOpcode().
VL
have the same type or false otherwise. Definition at line 1659 of file SLPVectorizer.cpp.
References llvm::all_of(), and llvm::ArrayRef< T >::consume_front().
Referenced by llvm::slpvectorizer::BoUpSLP::buildTree(), and llvm::slpvectorizer::BoUpSLP::getReorderingData().
|
static |
Checks if the provided operands of 2 cmp instructions are compatible, i.e.
compatible instructions or constants, or just some other regular values.
Definition at line 1429 of file SLPVectorizer.cpp.
References getSameOpcode(), and isConstant().
Referenced by isCmpSameOrSwapped().
|
static |
Definition at line 6291 of file SLPVectorizer.cpp.
References getSameOpcode(), llvm::getUnderlyingObject(), isConstant(), and RecursionMaxDepth.
Referenced by llvm::slpvectorizer::BoUpSLP::canVectorizeLoads().
|
static |
Check if two insertelement instructions are from the same buildvector.
Definition at line 7288 of file SLPVectorizer.cpp.
References getElementIndex(), llvm::ilist_detail::node_parent_access< NodeTy, ParentTy >::getParent(), llvm::InsertElementInst::getType(), llvm::Value::hasOneUse(), llvm::SmallBitVector::set(), and llvm::SmallBitVector::test().
Referenced by llvm::slpvectorizer::BoUpSLP::getTreeCost(), and llvm::slpvectorizer::BoUpSLP::vectorizeTree().
|
static |
Builds compress-like mask for shuffles for the given PointerOps
, ordered with Order
.
Definition at line 6595 of file SLPVectorizer.cpp.
References llvm::SmallVectorImpl< T >::assign(), DL, llvm::ArrayRef< T >::empty(), llvm::ArrayRef< T >::front(), llvm::getPointersDiff(), I, llvm::PoisonMaskElem, Ptr, and llvm::ArrayRef< T >::size().
Referenced by isMaskedLoadCompress().
|
static |
Builds the arguments types vector for the given call instruction with the given ID
for the specified vector factor.
Definition at line 9737 of file SLPVectorizer.cpp.
References llvm::CallBase::args(), llvm::enumerate(), llvm::IntegerType::get(), llvm::Value::getContext(), getWidenedType(), Idx, llvm::isVectorIntrinsicWithScalarOpAtArg(), llvm::Intrinsic::not_intrinsic, and llvm::SmallVectorTemplateBase< T, bool >::push_back().
|
static |
Prepares a use bitset for the given mask either for the first argument or for the second.
Definition at line 682 of file SLPVectorizer.cpp.
References llvm::enumerate(), Idx, and llvm::PoisonMaskElem.
Referenced by performExtractsShuffleAction().
|
static |
Checks if the provided list of pointers Pointers
represents the strided pointers for type ElemTy.
If they are not, std::nullopt is returned. Otherwise, if Inst
is not specified, just initialized optional value is returned to show that the pointers represent strided pointers. If Inst
specified, the runtime stride is materialized before the given Inst
.
Definition at line 6335 of file SLPVectorizer.cpp.
References llvm::SmallVectorImpl< T >::clear(), DL, llvm::SCEVExpander::expandCodeFor(), llvm::ScalarEvolution::getAddExpr(), llvm::ScalarEvolution::getConstant(), llvm::ScalarEvolution::getMinusSCEV(), llvm::ScalarEvolution::getMulExpr(), llvm::ScalarEvolution::getSCEV(), llvm::SCEV::getType(), llvm::ScalarEvolution::getUDivExactExpr(), llvm::SCEV::isNonConstantNegative(), llvm::SCEV::isZero(), Ptr, llvm::SmallVectorTemplateBase< T, bool >::push_back(), llvm::SmallVectorImpl< T >::resize(), llvm::ArrayRef< T >::size(), llvm::SmallVectorBase< Size_T >::size(), and Size.
Referenced by llvm::slpvectorizer::BoUpSLP::canVectorizeLoads().
|
static |
Definition at line 402 of file SLPVectorizer.cpp.
References assert(), llvm::ArrayRef< T >::front(), getShufflevectorNumGroups(), and llvm::PoisonMaskElem.
|
static |
Check if we can convert fadd/fsub sequence to FMAD.
Definition at line 12637 of file SLPVectorizer.cpp.
References llvm::all_of(), llvm::FastMathFlags::allowContract(), Analysis, assert(), CostKind, DL, llvm::ArrayRef< T >::front(), llvm::TargetTransformInfo::getInstructionCost(), llvm::TargetTransformInfo::getIntrinsicInstrCost(), llvm::InstructionCost::getInvalid(), getSameOpcode(), I, Operands, llvm::FastMathFlags::set(), llvm::TargetTransformInfo::TCK_RecipThroughput, and llvm::zip().
Checks if the quadratic mean deviation is less than 90% of the mean size.
Definition at line 22647 of file SLPVectorizer.cpp.
|
static |
Definition at line 7158 of file SLPVectorizer.cpp.
References llvm::all_of(), llvm::any_of(), assert(), llvm::sampleprof::Base, llvm::SmallVectorImpl< T >::clear(), llvm::SmallPtrSetImpl< PtrType >::contains(), llvm::Depth, llvm::ArrayRef< T >::drop_front(), llvm::enumerate(), llvm::MapVector< KeyT, ValueT, MapType, VectorType >::find(), llvm::MapVector< KeyT, ValueT, MapType, VectorType >::front(), llvm::ArrayRef< T >::front(), llvm::getUnderlyingObject(), llvm::SmallPtrSetImpl< PtrType >::insert(), llvm::isPointerTy(), P, Ptr, llvm::SmallVectorTemplateBase< T, bool >::push_back(), RecursionMaxDepth, llvm::ArrayRef< T >::size(), llvm::MapVector< KeyT, ValueT, MapType, VectorType >::size(), llvm::SmallVectorBase< Size_T >::size(), llvm::stable_sort(), and llvm::MapVector< KeyT, ValueT, MapType, VectorType >::try_emplace().
Referenced by llvm::slpvectorizer::BoUpSLP::findPartiallyOrderedLoads().
|
static |
Definition at line 7745 of file SLPVectorizer.cpp.
References assert(), llvm::ArrayRef< T >::empty(), Idx, llvm::SmallBitVector::set(), llvm::ArrayRef< T >::size(), and llvm::SmallBitVector::test().
Referenced by llvm::slpvectorizer::BoUpSLP::reorderBottomToTop(), and llvm::slpvectorizer::BoUpSLP::reorderTopToBottom().
|
static |
Compare two cmp instructions.
If IsCompatibility is true, function returns true if 2 cmps have same/swapped predicates and mos compatible corresponding operands. If IsCompatibility is false, function implements strict weak ordering relation between two cmp instructions, returning true if the first instruction is "less" than the second, i.e. its predicate is less than the predicate of the second or the operands IDs are less than the operands IDs of the second cmp instruction.
Definition at line 25657 of file SLPVectorizer.cpp.
References assert(), llvm::DomTreeNodeBase< NodeT >::getDFSNumIn(), llvm::DominatorTreeBase< NodeT, IsPostDom >::getNode(), llvm::User::getOperand(), getSameOpcode(), llvm::Type::getScalarSizeInBits(), llvm::CmpInst::getSwappedPredicate(), llvm::Value::getType(), llvm::Type::getTypeID(), llvm::Value::getValueID(), I, and isValidElementType().
Calculates minimal alignment as a common alignment.
Definition at line 6310 of file SLPVectorizer.cpp.
References llvm::ArrayRef< T >::consume_front(), and llvm::getAlign().
|
static |
Generates subvector extract using Generator
or using default shuffle.
Definition at line 6585 of file SLPVectorizer.cpp.
References llvm::IRBuilderBase::CreateShuffleVector(), Index, and llvm::PoisonMaskElem.
Referenced by llvm::slpvectorizer::BoUpSLP::vectorizeTree().
|
static |
Creates subvector insert.
Generates shuffle using Generator
or using default shuffle.
Definition at line 6555 of file SLPVectorizer.cpp.
DEBUG_COUNTER | ( | VectorizedGraphs | , |
"slp-vectorized" | , | ||
"Controls which SLP graphs should be vectorized." | |||
) |
|
static |
Definition at line 1666 of file SLPVectorizer.cpp.
References llvm::any_of(), llvm::CallBase::args(), llvm::enumerate(), llvm::Instruction::getOpcode(), llvm::LoadInst::getPointerOperand(), and llvm::getVectorIntrinsicIDForCall().
Referenced by llvm::slpvectorizer::BoUpSLP::buildExternalUses().
|
static |
Definition at line 8862 of file SLPVectorizer.cpp.
References llvm::dbgs(), and Idx.
|
static |
Recognize construction of vectors like ra = insertelement <4 x float> poison, float s0, i32 0 rb = insertelement <4 x float> ra, float s1, i32 1 rc = insertelement <4 x float> rb, float s2, i32 2 rd = insertelement <4 x float> rc, float s3, i32 3 starting from the last insertelement or insertvalue instruction.
Also recognize homogeneous aggregates like {<2 x float>, <2 x float>}, {{float, float}, {float, float}}, [2 x {float, float}] and so on. See llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll for examples.
Assume LastInsertInst is of InsertElementInst or InsertValueInst type.
Definition at line 25130 of file SLPVectorizer.cpp.
References assert(), llvm::SmallVectorBase< Size_T >::empty(), llvm::erase(), findBuildAggregateRec(), getAggregateSize(), llvm::SmallVectorImpl< T >::resize(), and llvm::SmallVectorBase< Size_T >::size().
|
static |
Definition at line 25091 of file SLPVectorizer.cpp.
References findBuildAggregateRec(), getElementIndex(), llvm::User::getOperand(), and llvm::Value::hasOneUse().
Referenced by findBuildAggregate(), and findBuildAggregateRec().
|
static |
Find an instruction with a specific opcode in VL.
VL | Array of values to search through. Must contain only Instructions and PoisonValues. |
Opcode | The instruction opcode to search for |
Definition at line 1414 of file SLPVectorizer.cpp.
References assert().
Referenced by getSameOpcode().
|
static |
Order may have elements assigned special value (size) which is out of bounds.
Such indices only appear on places which correspond to undef values (see canReuseExtract for details) and used in order to avoid undef values have effect on operands ordering. The first loop below simply finds all unused indices and then the next loop nest assigns these indices for undef values positions. As an example below Order has two undef positions and they have assigned values 3 and 7 respectively: before: 6 9 5 4 9 2 1 0 after: 6 3 5 4 7 2 1 0
Definition at line 1752 of file SLPVectorizer.cpp.
References assert(), llvm::SmallBitVector::count(), llvm::SmallBitVector::find_first(), llvm::SmallBitVector::find_next(), I, Idx, llvm::SmallBitVector::none(), llvm::SmallBitVector::reset(), llvm::SmallBitVector::set(), and llvm::ArrayRef< T >::size().
Referenced by llvm::slpvectorizer::BoUpSLP::getReorderingData(), llvm::slpvectorizer::BoUpSLP::reorderBottomToTop(), reorderOrder(), and llvm::slpvectorizer::BoUpSLP::reorderTopToBottom().
|
static |
Tries to find subvector of loads and builds new vector of only loads if can be profitable.
Definition at line 8917 of file SLPVectorizer.cpp.
References llvm::any_of(), assert(), llvm::ArrayRef< T >::begin(), llvm::bit_ceil(), llvm::SetVector< T, Vector, Set, N >::contains(), llvm::SmallSet< T, N, C >::contains(), llvm::SmallPtrSetImpl< PtrType >::contains(), llvm::Data, DL, llvm::SmallVectorImpl< T >::emplace_back(), llvm::ArrayRef< T >::empty(), llvm::SetVector< T, Vector, Set, N >::empty(), llvm::ArrayRef< T >::end(), llvm::enumerate(), llvm::find_if(), llvm::ArrayRef< T >::front(), llvm::ilist_detail::node_parent_access< NodeTy, ParentTy >::getParent(), llvm::LoadInst::getPointerOperand(), llvm::getPointersDiff(), llvm::Value::getType(), llvm::getUnderlyingObject(), getValueType(), llvm::has_single_bit(), Idx, llvm::SmallSet< T, N, C >::insert(), llvm::SmallPtrSetImpl< PtrType >::insert(), llvm::SetVector< T, Vector, Set, N >::insert_range(), isValidElementType(), llvm::Offset, P, RecursionMaxDepth, llvm::ArrayRef< T >::size(), and llvm::zip().
|
static |
Generates key/subkey pair for the given value to provide effective sorting of the values and better detection of the vectorizable values sequences.
The keys/subkeys can be used for better sorting of the values themselves (keys) and in values subgroups (subkeys).
Definition at line 9532 of file SLPVectorizer.cpp.
References llvm::SmallBitVector::all(), generateKeySubkey(), llvm::CmpInst::getInversePredicate(), llvm::VFDatabase::getMappings(), llvm::User::getOperand(), llvm::CmpInst::getSwappedPredicate(), llvm::Value::getType(), llvm::getVectorIntrinsicIDForCall(), llvm::hash_combine(), llvm::hash_value(), I, llvm::Instruction::isIntDivRem(), llvm::isTriviallyVectorizable(), isUndefVector(), and isVectorLikeInstWithConstOps().
Referenced by generateKeySubkey().
|
static |
Definition at line 25063 of file SLPVectorizer.cpp.
References llvm::Type::isSingleValueType(), and IV.
Referenced by findBuildAggregate().
|
static |
Definition at line 1778 of file SLPVectorizer.cpp.
References getNumElements(), getOpcode(), llvm::SmallBitVector::set(), and llvm::ArrayRef< T >::size().
Referenced by llvm::slpvectorizer::BoUpSLP::reorderTopToBottom().
Definition at line 18825 of file SLPVectorizer.cpp.
References DL, llvm::Instruction::getDebugLoc(), and llvm::DebugLoc::getUnknown().
Definition at line 612 of file SLPVectorizer.cpp.
References I, IV, and llvm::Offset.
|
static |
Definition at line 860 of file SLPVectorizer.cpp.
References assert(), llvm::Instruction::getOpcode(), and llvm::User::getOperand().
Referenced by llvm::slpvectorizer::BoUpSLP::getReorderingData().
|
static |
This is similar to TargetTransformInfo::getExtractWithExtendCost, but if Dst is a FixedVectorType, a vector will be extracted instead of a scalar.
Definition at line 6537 of file SLPVectorizer.cpp.
References assert(), CostKind, llvm::TargetTransformInfo::getCastInstrCost(), llvm::TargetTransformInfo::getExtractWithExtendCost(), getShuffleCost(), getWidenedType(), Index, llvm::TargetTransformInfo::None, llvm::TargetTransformInfo::SK_ExtractSubvector, and SLPReVec.
Referenced by llvm::slpvectorizer::BoUpSLP::getTreeCost().
|
static |
Returns the number of elements of the given type Ty
, not greater than Sz
, which forms type, which splits by TTI
into whole vector types during legalization.
Definition at line 297 of file SLPVectorizer.cpp.
References llvm::bit_ceil(), llvm::bit_floor(), llvm::divideCeil(), llvm::TargetTransformInfo::getNumberOfParts(), getWidenedType(), and isValidElementType().
Referenced by llvm::slpvectorizer::BoUpSLP::canVectorizeLoads().
|
static |
Returns the number of elements of the given type Ty
, not less than Sz
, which forms type, which splits by TTI
into whole vector types during legalization.
Definition at line 282 of file SLPVectorizer.cpp.
References llvm::bit_ceil(), llvm::divideCeil(), llvm::TargetTransformInfo::getNumberOfParts(), getWidenedType(), and isValidElementType().
Referenced by isMaskedLoadCompress(), and tryToFindDuplicates().
|
static |
Calculate the scalar and the vector costs from vectorizing set of GEPs.
Definition at line 12393 of file SLPVectorizer.cpp.
References llvm::all_of(), CostKind, llvm::ArrayRef< T >::end(), llvm::find_if(), llvm::TargetTransformInfo::getGEPCost(), llvm::TargetTransformInfo::getPointersChainCost(), Ptr, llvm::SmallVectorTemplateBase< T, bool >::push_back(), llvm::SmallVectorBase< Size_T >::size(), and llvm::TargetTransformInfo::TCC_Free.
Referenced by llvm::slpvectorizer::BoUpSLP::canVectorizeLoads(), and isMaskedLoadCompress().
|
static |
Definition at line 587 of file SLPVectorizer.cpp.
References llvm::Offset.
|
static |
Definition at line 1695 of file SLPVectorizer.cpp.
References llvm::MemoryLocation::get(), and I.
|
static |
Returns main/alternate instructions for the given VL
.
Unlike getSameOpcode supports non-compatible instructions for better SplitVectorize node support.
Definition at line 10300 of file SLPVectorizer.cpp.
References assert(), llvm::Instruction::getOpcode(), llvm::ilist_detail::node_parent_access< NodeTy, ParentTy >::getParent(), and I.
|
static |
Returns
the first operand of I
that does not match Phi
.
If operand is not an instruction it returns nullptr.
Definition at line 25253 of file SLPVectorizer.cpp.
References I, and matchRdxBop().
|
static |
I
, considering commutativity. Returns 2 for commutative instrinsics. I | The instruction to check for commutativity |
Definition at line 576 of file SLPVectorizer.cpp.
References I, and isCommutative().
Definition at line 265 of file SLPVectorizer.cpp.
References assert().
Referenced by llvm::MachineIRBuilder::buildSplatBuildVector(), cacheDIVar(), llvm::slpvectorizer::BoUpSLP::computeMinimumValueSizes(), containsUndefinedElement(), CreateGCRelocates(), expandAbs(), expandCrossIntrinsic(), expandExpIntrinsic(), expandLogIntrinsic(), extractVector(), getAltInstrMask(), llvm::AArch64TTIImpl::getCastInstrCost(), llvm::BasicTTIImplBase< T >::getIndexedVectorInstrCostFromEnd(), llvm::getNumberOfParts(), llvm::X86TTIImpl::getScalarizationOverhead(), llvm::BasicTTIImplBase< T >::getTypeBasedIntrinsicInstrCost(), llvm::SystemZTTIImpl::getVectorTruncCost(), getWidenedType(), insertVector(), llvm::X86TTIImpl::isLegalMaskedExpandLoad(), llvm::X86TTIImpl::isLegalMaskedLoad(), llvm::X86TTIImpl::isLegalMaskedStore(), isValidIndirectionTable(), llvm::ShuffleVectorInst::isValidOperands(), isVectorPromotionViableForSlice(), isZero(), promoteAllocaUserToVector(), llvm::RewriteStatepointsForGC::runOnFunction(), llvm::InstCombinerImpl::SimplifyDemandedVectorElts(), simplifyX86pack(), llvm::InstCombinerImpl::unshuffleConstant(), and upgradeX86IntrinsicCall().
Returns correct remaining number of elements, considering total amount Size
, (power-of-2 number) of elements in a single register PartNumElems
and current register (part) Part
.
Definition at line 453 of file SLPVectorizer.cpp.
References Size.
Referenced by llvm::slpvectorizer::BoUpSLP::findReusedOrderedScalars().
Returns power-of-2 number of elements in a single register (part), given the total number of elements Size
and number of registers (parts) NumParts
.
Definition at line 446 of file SLPVectorizer.cpp.
References llvm::bit_ceil(), llvm::divideCeil(), and Size.
Referenced by llvm::slpvectorizer::BoUpSLP::ShuffleCostEstimator::add(), and llvm::slpvectorizer::BoUpSLP::findReusedOrderedScalars().
Gets recurrence kind from the specified value.
Definition at line 25060 of file SLPVectorizer.cpp.
|
static |
Try and get a reduction instruction from a phi node.
Given a phi node P
in a block ParentBB
, consider possible reductions if they come from either ParentBB
or a containing loop latch.
Definition at line 25165 of file SLPVectorizer.cpp.
References llvm::DominatorTree::dominates(), llvm::LoopInfoBase< BlockT, LoopT >::getLoopFor(), llvm::LoopBase< BlockT, LoopT >::getLoopLatch(), and P.
|
static |
VL
described in InstructionsState, the Opcode that we suppose the whole list could be vectorized even if its structure is diverse. Definition at line 1465 of file SLPVectorizer.cpp.
References llvm::all_of(), allSameOpcode(), assert(), llvm::CmpInst::BAD_ICMP_PREDICATE, llvm::SetVector< T, Vector, Set, N >::contains(), llvm::SmallVectorBase< Size_T >::empty(), llvm::ArrayRef< T >::end(), llvm::find_if(), findInstructionWithOpcode(), llvm::SmallVectorTemplateCommon< T, typename >::front(), llvm::CallBase::getBundleOperandsStartIndex(), llvm::CallBase::getCalledFunction(), llvm::VFDatabase::getMappings(), llvm::Instruction::getOpcode(), llvm::User::getOperand(), llvm::CmpInst::getSwappedPredicate(), llvm::Value::getType(), llvm::getVectorIntrinsicIDForCall(), llvm::CallBase::hasOperandBundles(), I, llvm::SetVector< T, Vector, Set, N >::insert(), isCmpSameOrSwapped(), llvm::isTriviallyVectorizable(), isVectorLikeInstWithConstOps(), llvm::User::op_begin(), llvm::ArrayRef< T >::size(), llvm::SetVector< T, Vector, Set, N >::size(), and llvm::SmallVectorBase< Size_T >::size().
Referenced by areCompatibleCmpOps(), arePointersCompatible(), canConvertToFMA(), compareCmp(), llvm::slpvectorizer::BoUpSLP::LookAheadHeuristics::getShallowScore(), llvm::slpvectorizer::BoUpSLP::isTreeNotExtendable(), llvm::slpvectorizer::BoUpSLP::VLOperands::reorder(), and tryToFindDuplicates().
|
static |
This is similar to TargetTransformInfo::getScalarizationOverhead, but if ScalarTy is a FixedVectorType, a vector will be inserted or extracted instead of a scalar.
Definition at line 6485 of file SLPVectorizer.cpp.
Referenced by llvm::slpvectorizer::BoUpSLP::canVectorizeLoads(), llvm::LoopVectorizationCostModel::getDivRemSpeculationCost(), isMaskedLoadCompress(), and llvm::LoopVectorizationCostModel::setVectorizedCallDecision().
|
static |
Returns the cost of the shuffle instructions with the given Kind
, vector type Tp
and optional Mask
.
Adds SLP-specifc cost estimation for insert subvector pattern.
Definition at line 6456 of file SLPVectorizer.cpp.
Referenced by llvm::slpvectorizer::BoUpSLP::canVectorizeLoads(), getExtractWithExtendCost(), llvm::slpvectorizer::BoUpSLP::getReorderingData(), llvm::slpvectorizer::BoUpSLP::getTreeCost(), getVectorInstrCost(), and isMaskedLoadCompress().
Definition at line 350 of file SLPVectorizer.cpp.
References llvm::SmallBitVector::all(), llvm::all_of(), assert(), llvm::ArrayRef< T >::empty(), llvm::ArrayRef< T >::front(), I, llvm::SmallBitVector::set(), llvm::ArrayRef< T >::size(), and llvm::ArrayRef< T >::slice().
Referenced by calculateShufflevectorMask().
Returns the type of the given value/instruction V
.
If it is store, returns the type of its value operand, for Cmp - the types of the compare operands and for insertelement - the type os the inserted operand. Otherwise, just the type of the value is returned.
Definition at line 254 of file SLPVectorizer.cpp.
Referenced by CollectOpsToWiden(), combineArithReduction(), combineConcatVectorOps(), combineMulToPMULDQ(), combineTargetShuffle(), combineToHorizontalAddSub(), CompactSwizzlableVector(), detectPMADDUBSW(), FoldBUILD_VECTOR(), foldCONCAT_VECTORS(), gatherPossiblyVectorizableLoads(), llvm::TargetLoweringBase::getAsmOperandValueType(), llvm::Function::getFunctionType(), llvm::RISCVTTIImpl::getIntImmCostInst(), llvm::TargetLoweringBase::getMemValueType(), llvm::TargetLoweringBase::getSimpleValueType(), getTestBitOperand(), llvm::TargetLoweringBase::isExtLoad(), LLVMGlobalGetValueType(), matchPMADDWD(), matchPMADDWD_2(), narrowVectorSelect(), performCONCAT_VECTORSCombine(), llvm::SelectionDAGISel::SelectInlineAsmMemoryOperands(), and tryToFindDuplicates().
|
static |
Calculates the costs of vectorized intrinsic (if possible) and vectorized function (if possible) calls.
Returns invalid cost for the corresponding calls, if they cannot be vectorized/will be scalarized.
Definition at line 9762 of file SLPVectorizer.cpp.
References llvm::VFShape::get(), llvm::TargetTransformInfo::getCallInstrCost(), llvm::ElementCount::getFixed(), llvm::CallBase::getFunctionType(), llvm::TargetTransformInfo::getIntrinsicInstrCost(), llvm::InstructionCost::getInvalid(), llvm::FixedVectorType::getNumElements(), llvm::getVectorIntrinsicIDForCall(), llvm::VFDatabase::getVectorizedFunction(), IntrinsicCost, llvm::CallBase::isNoBuiltin(), and llvm::TargetTransformInfo::TCK_RecipThroughput.
|
static |
This is similar to TargetTransformInfo::getVectorInstrCost, but if ScalarTy is a FixedVectorType, a vector will be extracted instead of a scalar.
Definition at line 6518 of file SLPVectorizer.cpp.
References assert(), CostKind, getShuffleCost(), llvm::TargetTransformInfo::getVectorInstrCost(), Index, llvm::TargetTransformInfo::SK_ExtractSubvector, and SLPReVec.
Referenced by llvm::slpvectorizer::BoUpSLP::getTreeCost().
|
static |
Definition at line 274 of file SLPVectorizer.cpp.
References llvm::FixedVectorType::get(), getNumElements(), and llvm::Type::getScalarType().
Referenced by llvm::slpvectorizer::BoUpSLP::ShuffleCostEstimator::add(), buildIntrinsicArgTypes(), llvm::slpvectorizer::BoUpSLP::canMapToVector(), llvm::slpvectorizer::BoUpSLP::canVectorizeLoads(), llvm::slpvectorizer::BoUpSLP::computeMinimumValueSizes(), llvm::slpvectorizer::BoUpSLP::findReusedOrderedScalars(), getExtractWithExtendCost(), getFloorFullVectorNumberOfElements(), getFullVectorNumberOfElements(), llvm::slpvectorizer::BoUpSLP::getReductionType(), llvm::slpvectorizer::BoUpSLP::getReorderingData(), llvm::slpvectorizer::BoUpSLP::LookAheadHeuristics::getShallowScore(), llvm::slpvectorizer::BoUpSLP::getSpillCost(), llvm::slpvectorizer::BoUpSLP::getTreeCost(), llvm::hasFullVectorsOrPowerOf2(), isMaskedLoadCompress(), isStridedLoad(), llvm::slpvectorizer::BoUpSLP::isTreeTinyAndNotFullyVectorizable(), llvm::slpvectorizer::BoUpSLP::ShuffleInstructionBuilder::needToDelay(), llvm::slpvectorizer::BoUpSLP::optimizeGatherSequence(), llvm::slpvectorizer::BoUpSLP::reorderTopToBottom(), and llvm::slpvectorizer::BoUpSLP::vectorizeTree().
|
static |
Checks if the specified instruction I
is an alternate operation for the given MainOp
and AltOp
instructions.
Definition at line 11970 of file SLPVectorizer.cpp.
References assert(), llvm::CmpInst::getSwappedPredicate(), I, isCmpSameOrSwapped(), and P.
Referenced by llvm::slpvectorizer::BoUpSLP::getReorderingData().
|
static |
CI
has similar "look" and same predicate as BaseCI
, "as is" or with its operands and predicate swapped, false otherwise. Definition at line 1443 of file SLPVectorizer.cpp.
References areCompatibleCmpOps(), assert(), llvm::User::getOperand(), llvm::CmpInst::getPredicate(), llvm::CmpInst::getSwappedPredicate(), and llvm::Value::getType().
Referenced by getSameOpcode(), and isAlternateInstruction().
|
static |
This is a helper function to check whether I
is commutative.
This is a convenience wrapper that calls the two-parameter version of isCommutative with the same instruction for both parameters. This is the common case where the instruction being checked for commutativity is the same as the instruction whose uses are analyzed for special patterns (see the two-parameter version above for details).
I | The instruction to check for commutativity |
Definition at line 571 of file SLPVectorizer.cpp.
References I, and isCommutative().
|
static |
I
is commutative, handles CmpInst and BinaryOperator. For BinaryOperator, it also checks if InstWithUses
is used in specific patterns that make it effectively commutative (like equality comparisons with zero). In most cases, users should not call this function directly (since I
and InstWithUses
are the same). However, when analyzing interchangeable instructions, we need to use the converted opcode along with the original uses. I | The instruction to check for commutativity |
ValWithUses | The value whose uses are analyzed for special patterns |
Definition at line 530 of file SLPVectorizer.cpp.
References llvm::all_of(), llvm::Value::hasNUsesOrMore(), I, llvm::Value::uses(), and UsesLimit.
Referenced by getNumberOfPotentiallyCommutativeOps(), llvm::slpvectorizer::BoUpSLP::LookAheadHeuristics::getScoreAtLevelRec(), isCommutative(), llvm::slpvectorizer::BoUpSLP::isProfitableToReorder(), and llvm::FastISel::selectBinaryOp().
Definition at line 421 of file SLPVectorizer.cpp.
Referenced by allConstant(), areCompatibleCmpOps(), and isVectorLikeInstWithConstOps().
|
static |
Checks if the IE1
instructions is followed by IE2
instruction in the buildvector sequence.
Definition at line 15678 of file SLPVectorizer.cpp.
References getElementIndex(), llvm::Value::hasOneUse(), and llvm_unreachable.
Referenced by llvm::slpvectorizer::BoUpSLP::getTreeCost().
|
static |
Checks if the vector of instructions can be represented as a shuffle, like: x0 = extractelement <4 x i8> x, i32 0 x3 = extractelement <4 x i8> x, i32 3 y1 = extractelement <4 x i8> y, i32 1 y2 = extractelement <4 x i8> y, i32 2 x0x0 = mul i8 x0, x0 x3x3 = mul i8 x3, x3 y1y1 = mul i8 y1, y1 y2y2 = mul i8 y2, y2 ins1 = insertelement <4 x i8> poison, i8 x0x0, i32 0 ins2 = insertelement <4 x i8> ins1, i8 x3x3, i32 1 ins3 = insertelement <4 x i8> ins2, i8 y1y1, i32 2 ins4 = insertelement <4 x i8> ins3, i8 y2y2, i32 3 ret <4 x i8> ins4 can be transformed into: %1 = shufflevector <4 x i8> x, <4 x i8> y, <4 x i32> <i32 0, i32 3, i32 5, i32 6> %2 = mul <4 x i8> %1, %1 ret <4 x i8> %2 Mask will return the Shuffle Mask equivalent to the extracted elements.
TODO: Can we split off and reuse the shuffle mask detection from ShuffleVectorInst/getShuffleCost?
Definition at line 772 of file SLPVectorizer.cpp.
References llvm::any_of(), llvm::ArrayRef< T >::begin(), llvm::ArrayRef< T >::end(), llvm::find_if(), I, Idx, llvm::isGuaranteedNotToBePoison(), isUndefVector(), llvm::PoisonMaskElem, Select, llvm::ArrayRef< T >::size(), Size, llvm::TargetTransformInfo::SK_PermuteSingleSrc, llvm::TargetTransformInfo::SK_PermuteTwoSrc, llvm::TargetTransformInfo::SK_Select, and Unknown.
|
static |
Definition at line 15173 of file SLPVectorizer.cpp.
References llvm::dbgs(), llvm::IntegerType::get(), llvm::Value::getContext(), llvm::Type::getIntegerBitWidth(), llvm::TargetTransformInfo::isTypeLegal(), LLVM_DEBUG, llvm::PatternMatch::m_APInt(), llvm::PatternMatch::m_Or(), llvm::PatternMatch::m_Shl(), llvm::PatternMatch::m_Value(), llvm::PatternMatch::m_ZExt(), llvm::PatternMatch::match(), and llvm::APInt::urem().
Referenced by llvm::slpvectorizer::BoUpSLP::isLoadCombineCandidate(), and llvm::slpvectorizer::BoUpSLP::isLoadCombineReductionCandidate().
|
static |
Checks if the specified instruction I
is an main operation for the given MainOp
and AltOp
instructions.
Definition at line 11964 of file SLPVectorizer.cpp.
References I.
|
static |
Checks if the VL
can be transformed to a (masked)load + compress or (masked) interleaved load.
Definition at line 6759 of file SLPVectorizer.cpp.
References DL, and isMaskedLoadCompress().
|
static |
Checks if the VL
can be transformed to a (masked)load + compress or (masked) interleaved load.
Definition at line 6628 of file SLPVectorizer.cpp.
References assert(), llvm::ArrayRef< T >::back(), llvm::ArrayRef< T >::begin(), buildCompressMask(), llvm::CallingConv::C, CostKind, DL, llvm::ArrayRef< T >::empty(), llvm::ArrayRef< T >::end(), llvm::enumerate(), llvm::SmallVectorTemplateCommon< T, typename >::front(), llvm::ArrayRef< T >::front(), llvm::APInt::getAllOnes(), llvm::details::FixedOrScalableQuantity< LeafTy, ValueTy >::getFixedValue(), getFullVectorNumberOfElements(), getGEPCosts(), llvm::TargetTransformInfo::getInstructionCost(), llvm::TargetTransformInfo::getInterleavedMemoryOpCost(), llvm::TargetTransformInfo::getMaskedMemoryOpCost(), llvm::TargetTransformInfo::getMemoryOpCost(), llvm::getPointersDiff(), llvm::TargetTransformInfo::getRegisterBitWidth(), getScalarizationOverhead(), getShuffleCost(), llvm::TargetTransformInfo::getVectorInstrCost(), getWidenedType(), I, llvm::inversePermutation(), llvm::TargetTransformInfo::isLegalInterleavedAccessType(), llvm::TargetTransformInfo::isLegalMaskedLoad(), llvm::isSafeToLoadUnconditionally(), llvm::PoisonMaskElem, llvm::reorderScalars(), llvm::TargetTransformInfo::RGK_FixedWidthVector, llvm::ArrayRef< T >::size(), llvm::SmallVectorBase< Size_T >::size(), llvm::TargetTransformInfo::SK_PermuteSingleSrc, llvm::SmallVectorImpl< T >::swap(), and llvm::TargetTransformInfo::TCK_RecipThroughput.
Referenced by llvm::slpvectorizer::BoUpSLP::canVectorizeLoads(), and isMaskedLoadCompress().
|
static |
\Returns true if I
is a candidate instruction for reduction vectorization.
Definition at line 25262 of file SLPVectorizer.cpp.
References I, IsSelect(), llvm::PatternMatch::m_Select(), llvm::PatternMatch::m_Value(), llvm::PatternMatch::match(), and matchRdxBop().
Checks if the given mask is a "clustered" mask with the same clusters of size Sz
, which are not identity submasks.
Definition at line 7705 of file SLPVectorizer.cpp.
References E, I, and llvm::ShuffleVectorInst::isIdentityMask().
Check if Order
represents reverse order.
Definition at line 6318 of file SLPVectorizer.cpp.
References llvm::all_of(), assert(), llvm::ArrayRef< T >::empty(), llvm::enumerate(), and llvm::ArrayRef< T >::size().
Referenced by llvm::slpvectorizer::BoUpSLP::canVectorizeLoads(), and llvm::slpvectorizer::BoUpSLP::getReorderingData().
|
static |
Definition at line 1704 of file SLPVectorizer.cpp.
Referenced by combineBROADCAST_LOAD(), llvm::DOTGraphTraits< DOTFuncInfo * >::getNodeLabel(), llvm::DOTGraphTraits< MachineBlockFrequencyInfo * >::getNodeLabel(), llvm::DOTGraphTraits< const MachineFunction * >::getNodeLabel(), llvm::DOTGraphTraits< DOTMachineFuncInfo * >::getNodeLabel(), llvm::DOTGraphTraits< DomTreeNode * >::getNodeLabel(), llvm::DOTGraphTraits< RegionNode * >::getNodeLabel(), llvm::X86TargetLowering::shouldReduceLoadWidth(), tryToFoldExtOfMaskedLoad(), llvm::MBFIWrapper::view(), and llvm::MachineBlockFrequencyInfo::view().
VL
are identical or some of them are UndefValue. Definition at line 504 of file SLPVectorizer.cpp.
|
static |
Checks if strided loads can be generated out of VL
loads with pointers PointerOps:
Definition at line 6787 of file SLPVectorizer.cpp.
References llvm::ArrayRef< T >::back(), DL, llvm::ArrayRef< T >::empty(), llvm::ArrayRef< T >::front(), llvm::getPointersDiff(), getWidenedType(), llvm::has_single_bit(), llvm::SmallSet< T, N, C >::insert(), llvm::TargetTransformInfo::isLegalStridedLoadStore(), MaxProfitableLoadStride, MinProfitableStridedLoads, Ptr, llvm::ArrayRef< T >::size(), and llvm::SmallSet< T, N, C >::size().
Referenced by llvm::slpvectorizer::BoUpSLP::canVectorizeLoads().
|
static |
Checks if the given value is actually an undefined constant vector.
Also, if the UseMask
is not empty, tries to check if the non-masked elements actually mask the insertelement buildvector, if any.
Definition at line 703 of file SLPVectorizer.cpp.
Referenced by generateKeySubkey(), llvm::slpvectorizer::BoUpSLP::LookAheadHeuristics::getShallowScore(), isFixedVectorShuffle(), and performExtractsShuffleAction().
Predicate for the element types that the SLP vectorizer supports.
The most important thing to filter here are types which are invalid in LLVM vectors. We also filter target specific types which have absolutely no meaningful vectorization path such as x86_fp80 and ppc_f128. This just avoids spending time checking the cost model and realizing that they will be inevitably scalarized.
Definition at line 242 of file SLPVectorizer.cpp.
References llvm::Type::getScalarType(), llvm::Type::isPPC_FP128Ty(), llvm::Type::isX86_FP80Ty(), and SLPReVec.
Referenced by llvm::slpvectorizer::BoUpSLP::canMapToVector(), compareCmp(), llvm::slpvectorizer::BoUpSLP::findReusedOrderedScalars(), gatherPossiblyVectorizableLoads(), getFloorFullVectorNumberOfElements(), getFullVectorNumberOfElements(), llvm::slpvectorizer::BoUpSLP::LookAheadHeuristics::getShallowScore(), and llvm::hasFullVectorsOrPowerOf2().
Checks if V
is one of vector-like instructions, i.e.
undef, insertelement/extractelement with constant indices for fixed vector type or extractvalue instruction.
Definition at line 428 of file SLPVectorizer.cpp.
References assert(), I, and isConstant().
Referenced by allSameBlock(), generateKeySubkey(), and getSameOpcode().
|
static |
Definition at line 25209 of file SLPVectorizer.cpp.
References I, llvm::PatternMatch::m_BinOp(), llvm::PatternMatch::m_FMaximum(), llvm::PatternMatch::m_FMaxNum(), llvm::PatternMatch::m_FMinimum(), llvm::PatternMatch::m_FMinNum(), llvm::PatternMatch::m_Value(), and llvm::PatternMatch::match().
Referenced by getNonPhiOperand(), and isReductionCandidate().
|
static |
Does the analysis of the provided shuffle masks and performs the requested actions on the vectors with the given shuffle masks.
It tries to do it in several steps.
Definition at line 15733 of file SLPVectorizer.cpp.
References llvm::SmallBitVector::all(), assert(), llvm::sampleprof::Base, buildUseMask(), I, Idx, isUndefVector(), llvm::PoisonMaskElem, and llvm::SmallBitVector::test().
|
static |
I
after propagating metadata from VL
only for instructions in VL
. Definition at line 18817 of file SLPVectorizer.cpp.
References llvm::propagateMetadata(), and llvm::SmallVectorTemplateBase< T, bool >::push_back().
|
static |
Reorders the given Order
according to the given Mask
.
Order
- is the original order of the scalars. Procedure transforms the provided order in accordance with the given Mask
. If the resulting Order
is just an identity order, Order
is cleared.
Definition at line 6061 of file SLPVectorizer.cpp.
References llvm::all_of(), assert(), llvm::SmallVectorImpl< T >::assign(), llvm::SmallVectorTemplateCommon< T, typename >::begin(), llvm::SmallVectorImpl< T >::clear(), llvm::Data, llvm::SmallVectorBase< Size_T >::empty(), llvm::SmallVectorTemplateCommon< T, typename >::end(), llvm::enumerate(), fixupOrderingIndices(), I, llvm::inversePermutation(), llvm::ShuffleVectorInst::isIdentityMask(), llvm::PoisonMaskElem, reorderReuses(), llvm::SmallVectorImpl< T >::resize(), and llvm::SmallVectorImpl< T >::swap().
Referenced by llvm::slpvectorizer::BoUpSLP::getReorderingData(), llvm::slpvectorizer::BoUpSLP::reorderBottomToTop(), and llvm::slpvectorizer::BoUpSLP::reorderTopToBottom().
|
static |
Reorders the given Reuses
mask according to the given Mask
.
Reuses
contains original mask for the scalars reused in the node. Procedure transform this mask in accordance with the given Mask
.
Definition at line 6047 of file SLPVectorizer.cpp.
References assert(), llvm::SmallVectorTemplateCommon< T, typename >::begin(), E, llvm::SmallVectorTemplateCommon< T, typename >::end(), I, llvm::PoisonMaskElem, llvm::SmallVectorBase< Size_T >::size(), and llvm::SmallVectorImpl< T >::swap().
Referenced by llvm::slpvectorizer::BoUpSLP::reorderBottomToTop(), and reorderOrder().
|
static |
Replicates the given Val
VF
times.
Definition at line 1793 of file SLPVectorizer.cpp.
References assert(), llvm::SmallVectorTemplateCommon< T, typename >::begin(), llvm::CallingConv::C, llvm::enumerate(), I, llvm::none_of(), and llvm::ArrayRef< T >::size().
Referenced by llvm::slpvectorizer::BoUpSLP::ShuffleCostEstimator::gather().
Print a short descriptor of the instruction bundle suitable for debug output.
Definition at line 460 of file SLPVectorizer.cpp.
References llvm::ArrayRef< T >::front(), Idx, OS, and llvm::ArrayRef< T >::size().
Referenced by llvm::slpvectorizer::BoUpSLP::getTreeCost().
STATISTIC | ( | NumVectorInstructions | , |
"Number of vector instructions generated" | |||
) |
|
static |
Definition at line 311 of file SLPVectorizer.cpp.
References llvm::enumerate(), I, and llvm::PoisonMaskElem.
|
static |
We could have an initial reduction that is not an add.
r *= v1 + v2 + v3 + v4 In such a case start looking for a tree rooted in the first '+'. \Returns the new root if found, which may be nullptr if not an instruction.
Definition at line 25235 of file SLPVectorizer.cpp.
References assert(), llvm::User::getOperand(), LHS, and RHS.
|
static |
Checks that every instruction appears once in the list and if not, packs them, building ReuseShuffleIndices
mask and mutating VL
.
The list of unique scalars is extended by poison values to the whole register size.
VL
could not be uniquified, in which case VL
is unchanged and ReuseShuffleIndices
is empty. Definition at line 10342 of file SLPVectorizer.cpp.
References llvm::all_of(), llvm::SmallVectorImpl< T >::append(), llvm::SmallVectorTemplateCommon< T, typename >::begin(), llvm::SmallVectorImpl< T >::clear(), llvm::dbgs(), llvm::SmallVectorImpl< T >::emplace_back(), llvm::SmallVectorTemplateCommon< T, typename >::end(), llvm::SmallVectorTemplateCommon< T, typename >::front(), llvm::PoisonValue::get(), getFullVectorNumberOfElements(), getSameOpcode(), getValueType(), llvm::hasFullVectorsOrPowerOf2(), isConstant(), LLVM_DEBUG, llvm::PoisonMaskElem, llvm::DenseMapBase< DerivedT, KeyT, ValueT, KeyInfoT, BucketT >::size(), llvm::SmallVectorBase< Size_T >::size(), llvm::DenseMapBase< DerivedT, KeyT, ValueT, KeyInfoT, BucketT >::try_emplace(), llvm::slpvectorizer::BoUpSLP::EdgeInfo::UserTE, and VectorizeNonPowerOf2.
|
static |
\Returns the minimum number of elements that we will attempt to vectorize.
Definition at line 25538 of file SLPVectorizer.cpp.
References llvm::SmallVectorTemplateCommon< T, typename >::begin(), llvm::SmallVectorImpl< T >::clear(), llvm::dbgs(), llvm::SmallVectorBase< Size_T >::empty(), llvm::SmallVectorTemplateCommon< T, typename >::end(), End, llvm::SmallVectorTemplateCommon< T, typename >::front(), I, LLVM_DEBUG, llvm::SmallVectorTemplateBase< T, bool >::push_back(), llvm::SmallVectorBase< Size_T >::size(), llvm::stable_sort(), and llvm::SmallVectorImpl< T >::swap().
Definition at line 217 of file SLPVectorizer.cpp.
|
static |
Definition at line 226 of file SLPVectorizer.cpp.
Maximum allowed number of operands in the PHI nodes.
Definition at line 233 of file SLPVectorizer.cpp.
|
static |
Referenced by isStridedLoad().
|
static |
Referenced by llvm::slpvectorizer::BoUpSLP::BoUpSLP().
|
static |
Referenced by llvm::slpvectorizer::BoUpSLP::getMaximumVF().
|
static |
Referenced by llvm::slpvectorizer::BoUpSLP::canVectorizeLoads(), and isStridedLoad().
|
static |
If the ScheduleRegionSizeBudget is exhausted, we allow small scheduling regions to be handled.
Definition at line 230 of file SLPVectorizer.cpp.
|
static |
Referenced by llvm::slpvectorizer::BoUpSLP::BoUpSLP().
|
static |
|
static |
Referenced by llvm::slpvectorizer::BoUpSLP::findBestRootPair().
|
static |
Referenced by llvm::SLPVectorizerPass::runImpl().
|
static |
Limits the size of scheduling regions in a block.
It avoid long compile times for very large blocks where vector instructions are spread over a wide range. This limit is way higher than needed by real-world functions.
Referenced by llvm::slpvectorizer::BoUpSLP::getSpillCost().
|
static |
|
static |
|
static |
|
static |
|
staticconstexpr |
Definition at line 221 of file SLPVectorizer.cpp.
Referenced by llvm::slpvectorizer::BoUpSLP::buildExternalUses(), llvm::slpvectorizer::BoUpSLP::computeMinimumValueSizes(), llvm::slpvectorizer::BoUpSLP::LookAheadHeuristics::getShallowScore(), llvm::slpvectorizer::BoUpSLP::getTreeCost(), isCommutative(), and llvm::isUsedOutsideBlock().
|
static |
Enables vectorization of copyable elements.
|
static |
Referenced by llvm::slpvectorizer::BoUpSLP::getReorderingData(), and tryToFindDuplicates().
|
static |
Referenced by llvm::slpvectorizer::BoUpSLP::getTreeCost().