LLVM: lib/Transforms/Vectorize/LoopVectorizationPlanner.h Source File

//===- LoopVectorizationPlanner.h - Planner for LoopVectorization ---------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

///

/// \file

/// This file provides a LoopVectorizationPlanner class.

/// InnerLoopVectorizer vectorizes loops which contain only one basic

/// LoopVectorizationPlanner - drives the vectorization process after having

/// passed Legality checks.

/// The planner builds and optimizes the Vectorization Plans which record the

/// decisions how to vectorize the given loop. In particular, represent the

/// control-flow of the vectorized version, the replication of instructions that

/// are to be scalarized, and interleave access groups.

///

/// Also provides a VPlan-based builder utility analogous to IRBuilder.

/// It provides an instruction-level API for generating VPInstructions while

/// abstracting away the Recipe manipulation details.

//===----------------------------------------------------------------------===//


#ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H

#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H


#include "VPlan.h"

#include "llvm/ADT/SmallSet.h"

#include "llvm/Support/InstructionCost.h"


namespace llvm {


class LoopInfo;

class DominatorTree;

class LoopVectorizationLegality;

class LoopVectorizationCostModel;

class PredicatedScalarEvolution;

class LoopVectorizeHints;

class OptimizationRemarkEmitter;

class TargetTransformInfo;

class TargetLibraryInfo;

class VPRecipeBuilder;

struct VFRange;


/// VPlan-based builder utility analogous to IRBuilder.

class VPBuilder {

  VPBasicBlock *BB = nullptr;

  VPBasicBlock::iterator InsertPt = VPBasicBlock::iterator();


  /// Insert \p VPI in BB at InsertPt if BB is set.

  template <typename T> T *tryInsertInstruction(T *R) {

    if (BB)

      BB->insert(R, InsertPt);

    return R;

  }


  VPInstruction *createInstruction(unsigned Opcode,

                                   ArrayRef<VPValue *> Operands, DebugLoc DL,

                                   const Twine &Name = "") {

    return tryInsertInstruction(new VPInstruction(Opcode, Operands, DL, Name));

  }


  VPInstruction *createInstruction(unsigned Opcode,

                                   std::initializer_list<VPValue *> Operands,

                                   DebugLoc DL, const Twine &Name = "") {

    return createInstruction(Opcode, ArrayRef<VPValue *>(Operands), DL, Name);

  }


public:

  VPBuilder() = default;

  VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); }

  VPBuilder(VPRecipeBase *InsertPt) { setInsertPoint(InsertPt); }

  VPBuilder(VPBasicBlock *TheBB, VPBasicBlock::iterator IP) {

    setInsertPoint(TheBB, IP);

  }


  /// Clear the insertion point: created instructions will not be inserted into

  /// a block.

  void clearInsertionPoint() {

    BB = nullptr;

    InsertPt = VPBasicBlock::iterator();

  }


  VPBasicBlock *getInsertBlock() const { return BB; }

  VPBasicBlock::iterator getInsertPoint() const { return InsertPt; }


  /// Create a VPBuilder to insert after \p R.

  static VPBuilder getToInsertAfter(VPRecipeBase *R) {

    VPBuilder B;

    B.setInsertPoint(R->getParent(), std::next(R->getIterator()));

    return B;

  }


  /// InsertPoint - A saved insertion point.

  class VPInsertPoint {

    VPBasicBlock *Block = nullptr;

    VPBasicBlock::iterator Point;


  public:

    /// Creates a new insertion point which doesn't point to anything.

    VPInsertPoint() = default;


    /// Creates a new insertion point at the given location.

    VPInsertPoint(VPBasicBlock *InsertBlock, VPBasicBlock::iterator InsertPoint)

        : Block(InsertBlock), Point(InsertPoint) {}


    /// Returns true if this insert point is set.

    bool isSet() const { return Block != nullptr; }


    VPBasicBlock *getBlock() const { return Block; }

    VPBasicBlock::iterator getPoint() const { return Point; }

  };


  /// Sets the current insert point to a previously-saved location.

  void restoreIP(VPInsertPoint IP) {

    if (IP.isSet())

      setInsertPoint(IP.getBlock(), IP.getPoint());

    else

      clearInsertionPoint();

  }


  /// This specifies that created VPInstructions should be appended to the end

  /// of the specified block.

  void setInsertPoint(VPBasicBlock *TheBB) {

    assert(TheBB && "Attempting to set a null insert point");

    BB = TheBB;

    InsertPt = BB->end();

  }


  /// This specifies that created instructions should be inserted at the

  /// specified point.

  void setInsertPoint(VPBasicBlock *TheBB, VPBasicBlock::iterator IP) {

    BB = TheBB;

    InsertPt = IP;

  }


  /// This specifies that created instructions should be inserted at the

  /// specified point.

  void setInsertPoint(VPRecipeBase *IP) {

    BB = IP->getParent();

    InsertPt = IP->getIterator();

  }


  /// Insert \p R at the current insertion point.

  void insert(VPRecipeBase *R) { BB->insert(R, InsertPt); }


  /// Create an N-ary operation with \p Opcode, \p Operands and set \p Inst as

  /// its underlying Instruction.

  VPInstruction *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands,

                              Instruction *Inst = nullptr,

                              const Twine &Name = "") {

    DebugLoc DL;

    if (Inst)

      DL = Inst->getDebugLoc();

    VPInstruction *NewVPInst = createInstruction(Opcode, Operands, DL, Name);

    NewVPInst->setUnderlyingValue(Inst);

    return NewVPInst;

  }

  VPInstruction *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands,

                              DebugLoc DL, const Twine &Name = "") {

    return createInstruction(Opcode, Operands, DL, Name);

  }

  VPInstruction *createNaryOp(unsigned Opcode,

                              std::initializer_list<VPValue *> Operands,

                              std::optional<FastMathFlags> FMFs = {},

                              DebugLoc DL = {}, const Twine &Name = "") {

    if (FMFs)

      return tryInsertInstruction(

          new VPInstruction(Opcode, Operands, *FMFs, DL, Name));

    return createInstruction(Opcode, Operands, DL, Name);

  }


  VPInstruction *createOverflowingOp(unsigned Opcode,

                                     std::initializer_list<VPValue *> Operands,

                                     VPRecipeWithIRFlags::WrapFlagsTy WrapFlags,

                                     DebugLoc DL = {}, const Twine &Name = "") {

    return tryInsertInstruction(

        new VPInstruction(Opcode, Operands, WrapFlags, DL, Name));

  }


  VPValue *createNot(VPValue *Operand, DebugLoc DL = {},

                     const Twine &Name = "") {

    return createInstruction(VPInstruction::Not, {Operand}, DL, Name);

  }


  VPValue *createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},

                     const Twine &Name = "") {

    return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, DL, Name);

  }


  VPValue *createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},

                    const Twine &Name = "") {


    return tryInsertInstruction(new VPInstruction(

        Instruction::BinaryOps::Or, {LHS, RHS},

        VPRecipeWithIRFlags::DisjointFlagsTy(false), DL, Name));

  }


  VPValue *createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},

                            const Twine &Name = "") {

    return tryInsertInstruction(

        new VPInstruction(VPInstruction::LogicalAnd, {LHS, RHS}, DL, Name));

  }


  VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal,

                        DebugLoc DL = {}, const Twine &Name = "",

                        std::optional<FastMathFlags> FMFs = std::nullopt) {

    auto *Select =

        FMFs ? new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal},

                                 *FMFs, DL, Name)

             : new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal},

                                 DL, Name);

    return tryInsertInstruction(Select);

  }


  /// Create a new ICmp VPInstruction with predicate \p Pred and operands \p A

  /// and \p B.

  /// TODO: add createFCmp when needed.

  VPValue *createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B,

                      DebugLoc DL = {}, const Twine &Name = "") {

    assert(Pred >= CmpInst::FIRST_ICMP_PREDICATE &&

           Pred <= CmpInst::LAST_ICMP_PREDICATE && "invalid predicate");

    return tryInsertInstruction(

        new VPInstruction(Instruction::ICmp, Pred, A, B, DL, Name));

  }


  VPInstruction *createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL = {},

                              const Twine &Name = "") {

    return tryInsertInstruction(

        new VPInstruction(Ptr, Offset, GEPNoWrapFlags::none(), DL, Name));

  }

  VPValue *createInBoundsPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL = {},

                                const Twine &Name = "") {

    return tryInsertInstruction(

        new VPInstruction(Ptr, Offset, GEPNoWrapFlags::inBounds(), DL, Name));

  }


  /// Convert the input value \p Current to the corresponding value of an

  /// induction with \p Start and \p Step values, using \p Start + \p Current *

  /// \p Step.

  VPDerivedIVRecipe *createDerivedIV(InductionDescriptor::InductionKind Kind,

                                     FPMathOperator *FPBinOp, VPValue *Start,

                                     VPValue *Current, VPValue *Step,

                                     const Twine &Name = "") {

    return tryInsertInstruction(

        new VPDerivedIVRecipe(Kind, FPBinOp, Start, Current, Step, Name));

  }


  VPScalarCastRecipe *createScalarCast(Instruction::CastOps Opcode, VPValue *Op,

                                       Type *ResultTy, DebugLoc DL) {

    return tryInsertInstruction(

        new VPScalarCastRecipe(Opcode, Op, ResultTy, DL));

  }


  VPWidenCastRecipe *createWidenCast(Instruction::CastOps Opcode, VPValue *Op,

                                     Type *ResultTy) {

    return tryInsertInstruction(new VPWidenCastRecipe(Opcode, Op, ResultTy));

  }


  VPScalarIVStepsRecipe *

  createScalarIVSteps(Instruction::BinaryOps InductionOpcode,

                      FPMathOperator *FPBinOp, VPValue *IV, VPValue *Step) {

    return tryInsertInstruction(new VPScalarIVStepsRecipe(

        IV, Step, InductionOpcode,

        FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags()));

  }


  //===--------------------------------------------------------------------===//

  // RAII helpers.

  //===--------------------------------------------------------------------===//


  /// RAII object that stores the current insertion point and restores it when

  /// the object is destroyed.

  class InsertPointGuard {

    VPBuilder &Builder;

    VPBasicBlock *Block;

    VPBasicBlock::iterator Point;


  public:

    InsertPointGuard(VPBuilder &B)

        : Builder(B), Block(B.getInsertBlock()), Point(B.getInsertPoint()) {}


    InsertPointGuard(const InsertPointGuard &) = delete;

    InsertPointGuard &operator=(const InsertPointGuard &) = delete;


    ~InsertPointGuard() { Builder.restoreIP(VPInsertPoint(Block, Point)); }

  };

};


/// TODO: The following VectorizationFactor was pulled out of

/// LoopVectorizationCostModel class. LV also deals with

/// VectorizerParams::VectorizationFactor.

/// We need to streamline them.


/// Information about vectorization costs.

struct VectorizationFactor {

  /// Vector width with best cost.

  ElementCount Width;


  /// Cost of the loop with that width.

  InstructionCost Cost;


  /// Cost of the scalar loop.

  InstructionCost ScalarCost;


  /// The minimum trip count required to make vectorization profitable, e.g. due

  /// to runtime checks.

  ElementCount MinProfitableTripCount;


  VectorizationFactor(ElementCount Width, InstructionCost Cost,

                      InstructionCost ScalarCost)

      : Width(Width), Cost(Cost), ScalarCost(ScalarCost) {}


  /// Width 1 means no vectorization, cost 0 means uncomputed cost.

  static VectorizationFactor Disabled() {

    return {ElementCount::getFixed(1), 0, 0};

  }


  bool operator==(const VectorizationFactor &rhs) const {

    return Width == rhs.Width && Cost == rhs.Cost;

  }


  bool operator!=(const VectorizationFactor &rhs) const {

    return !(*this == rhs);

  }

};


/// A class that represents two vectorization factors (initialized with 0 by

/// default). One for fixed-width vectorization and one for scalable

/// vectorization. This can be used by the vectorizer to choose from a range of

/// fixed and/or scalable VFs in order to find the most cost-effective VF to

/// vectorize with.

struct FixedScalableVFPair {

  ElementCount FixedVF;

  ElementCount ScalableVF;


  FixedScalableVFPair()

      : FixedVF(ElementCount::getFixed(0)),

        ScalableVF(ElementCount::getScalable(0)) {}

  FixedScalableVFPair(const ElementCount &Max) : FixedScalableVFPair() {

    *(Max.isScalable() ? &ScalableVF : &FixedVF) = Max;

  }

  FixedScalableVFPair(const ElementCount &FixedVF,

                      const ElementCount &ScalableVF)

      : FixedVF(FixedVF), ScalableVF(ScalableVF) {

    assert(!FixedVF.isScalable() && ScalableVF.isScalable() &&

           "Invalid scalable properties");

  }


  static FixedScalableVFPair getNone() { return FixedScalableVFPair(); }


  /// \return true if either fixed- or scalable VF is non-zero.

  explicit operator bool() const { return FixedVF || ScalableVF; }


  /// \return true if either fixed- or scalable VF is a valid vector VF.

  bool hasVector() const { return FixedVF.isVector() || ScalableVF.isVector(); }

};


/// Planner drives the vectorization process after having passed

/// Legality checks.

class LoopVectorizationPlanner {

  /// The loop that we evaluate.

  Loop *OrigLoop;


  /// Loop Info analysis.

  LoopInfo *LI;


  /// The dominator tree.

  DominatorTree *DT;


  /// Target Library Info.

  const TargetLibraryInfo *TLI;


  /// Target Transform Info.

  const TargetTransformInfo &TTI;


  /// The legality analysis.

  LoopVectorizationLegality *Legal;


  /// The profitability analysis.

  LoopVectorizationCostModel &CM;


  /// The interleaved access analysis.

  InterleavedAccessInfo &IAI;


  PredicatedScalarEvolution &PSE;


  const LoopVectorizeHints &Hints;


  OptimizationRemarkEmitter *ORE;


  SmallVector<VPlanPtr, 4> VPlans;


  /// Profitable vector factors.

  SmallVector<VectorizationFactor, 8> ProfitableVFs;


  /// A builder used to construct the current plan.

  VPBuilder Builder;


  /// Computes the cost of \p Plan for vectorization factor \p VF.

  ///

  /// The current implementation requires access to the

  /// LoopVectorizationLegality to handle inductions and reductions, which is

  /// why it is kept separate from the VPlan-only cost infrastructure.

  ///

  /// TODO: Move to VPlan::cost once the use of LoopVectorizationLegality has

  /// been retired.

  InstructionCost cost(VPlan &Plan, ElementCount VF) const;


  /// Precompute costs for certain instructions using the legacy cost model. The

  /// function is used to bring up the VPlan-based cost model to initially avoid

  /// taking different decisions due to inaccuracies in the legacy cost model.

  InstructionCost precomputeCosts(VPlan &Plan, ElementCount VF,

                                  VPCostContext &CostCtx) const;


public:

  LoopVectorizationPlanner(

      Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,

      const TargetTransformInfo &TTI, LoopVectorizationLegality *Legal,

      LoopVectorizationCostModel &CM, InterleavedAccessInfo &IAI,

      PredicatedScalarEvolution &PSE, const LoopVectorizeHints &Hints,

      OptimizationRemarkEmitter *ORE)

      : OrigLoop(L), LI(LI), DT(DT), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),

        IAI(IAI), PSE(PSE), Hints(Hints), ORE(ORE) {}


  /// Build VPlans for the specified \p UserVF and \p UserIC if they are

  /// non-zero or all applicable candidate VFs otherwise. If vectorization and

  /// interleaving should be avoided up-front, no plans are generated.

  void plan(ElementCount UserVF, unsigned UserIC);


  /// Use the VPlan-native path to plan how to best vectorize, return the best

  /// VF and its cost.

  VectorizationFactor planInVPlanNativePath(ElementCount UserVF);


  /// Return the VPlan for \p VF. At the moment, there is always a single VPlan

  /// for each VF.

  VPlan &getPlanFor(ElementCount VF) const;


  /// Compute and return the most profitable vectorization factor. Also collect

  /// all profitable VFs in ProfitableVFs.

  VectorizationFactor computeBestVF();


  /// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan

  /// according to the best selected \p VF and  \p UF.

  ///

  /// TODO: \p VectorizingEpilogue indicates if the executed VPlan is for the

  /// epilogue vector loop. It should be removed once the re-use issue has been

  /// fixed.

  /// \p ExpandedSCEVs is passed during execution of the plan for epilogue loop

  /// to re-use expansion results generated during main plan execution.

  ///

  /// Returns a mapping of SCEVs to their expanded IR values.

  /// Note that this is a temporary workaround needed due to the current

  /// epilogue handling.

  DenseMap<const SCEV *, Value *>

  executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan,

              InnerLoopVectorizer &LB, DominatorTree *DT,

              bool VectorizingEpilogue,

              const DenseMap<const SCEV *, Value *> *ExpandedSCEVs = nullptr);


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

  void printPlans(raw_ostream &O);

#endif


  /// Look through the existing plans and return true if we have one with

  /// vectorization factor \p VF.

  bool hasPlanWithVF(ElementCount VF) const {

    return any_of(VPlans,

                  [&](const VPlanPtr &Plan) { return Plan->hasVF(VF); });

  }


  /// Test a \p Predicate on a \p Range of VF's. Return the value of applying

  /// \p Predicate on Range.Start, possibly decreasing Range.End such that the

  /// returned value holds for the entire \p Range.

  static bool

  getDecisionAndClampRange(const std::function<bool(ElementCount)> &Predicate,

                           VFRange &Range);


  /// \return The most profitable vectorization factor and the cost of that VF

  /// for vectorizing the epilogue. Returns VectorizationFactor::Disabled if

  /// epilogue vectorization is not supported for the loop.

  VectorizationFactor

  selectEpilogueVectorizationFactor(const ElementCount MaxVF, unsigned IC);


  /// Emit remarks for recipes with invalid costs in the available VPlans.

  void emitInvalidCostRemarks(OptimizationRemarkEmitter *ORE);


protected:

  /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,

  /// according to the information gathered by Legal when it checked if it is

  /// legal to vectorize the loop.

  void buildVPlans(ElementCount MinVF, ElementCount MaxVF);


private:

  /// Build a VPlan according to the information gathered by Legal. \return a

  /// VPlan for vectorization factors \p Range.Start and up to \p Range.End

  /// exclusive, possibly decreasing \p Range.End.

  VPlanPtr buildVPlan(VFRange &Range);


  /// Build a VPlan using VPRecipes according to the information gather by

  /// Legal. This method is only used for the legacy inner loop vectorizer.

  /// \p Range's largest included VF is restricted to the maximum VF the

  /// returned VPlan is valid for. If no VPlan can be built for the input range,

  /// set the largest included VF to the maximum VF for which no plan could be

  /// built.

  VPlanPtr tryToBuildVPlanWithVPRecipes(VFRange &Range);


  /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,

  /// according to the information gathered by Legal when it checked if it is

  /// legal to vectorize the loop. This method creates VPlans using VPRecipes.

  void buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF);


  // Adjust the recipes for reductions. For in-loop reductions the chain of

  // instructions leading from the loop exit instr to the phi need to be

  // converted to reductions, with one operand being vector and the other being

  // the scalar reduction chain. For other reductions, a select is introduced

  // between the phi and users outside the vector region when folding the tail.

  void adjustRecipesForReductions(VPlanPtr &Plan,

                                  VPRecipeBuilder &RecipeBuilder,

                                  ElementCount MinVF);


#ifndef NDEBUG

  /// \return The most profitable vectorization factor for the available VPlans

  /// and the cost of that VF.

  /// This is now only used to verify the decisions by the new VPlan-based

  /// cost-model and will be retired once the VPlan-based cost-model is

  /// stabilized.

  VectorizationFactor selectVectorizationFactor();

#endif


  /// Returns true if the per-lane cost of VectorizationFactor A is lower than

  /// that of B.

  bool isMoreProfitable(const VectorizationFactor &A,

                        const VectorizationFactor &B) const;


  /// Returns true if the per-lane cost of VectorizationFactor A is lower than

  /// that of B in the context of vectorizing a loop with known \p MaxTripCount.

  bool isMoreProfitable(const VectorizationFactor &A,

                        const VectorizationFactor &B,

                        const unsigned MaxTripCount) const;


  /// Determines if we have the infrastructure to vectorize the loop and its

  /// epilogue, assuming the main loop is vectorized by \p VF.

  bool isCandidateForEpilogueVectorization(const ElementCount VF) const;

};


} // namespace llvm


#endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H

Select
AMDGPU Register Bank Select
Definition: AMDGPURegBankSelect.cpp:71

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: ARMSLSHardening.cpp:73

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

Name
std::string Name
Definition: ELFObjHandler.cpp:77

InstructionCost.h
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...

Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:74

Range
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))

Cond
const SmallVectorImpl< MachineOperand > & Cond
Definition: RISCVRedundantCopyElimination.cpp:75

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

SmallSet.h
This file defines the SmallSet class.

Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:77

VPlan.h
This file contains the declarations of the Vectorization Plan base classes:

RHS
Value * RHS
Definition: X86PartialReduction.cpp:74

LHS
Value * LHS
Definition: X86PartialReduction.cpp:73

IV
static const uint32_t IV[8]
Definition: blake3_impl.h:78

Predicate
Definition: AMDGPURegBankLegalizeRules.cpp:332

T

bool

llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41

llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673

llvm::CmpInst::FIRST_ICMP_PREDICATE
@ FIRST_ICMP_PREDICATE
Definition: InstrTypes.h:704

llvm::CmpInst::LAST_ICMP_PREDICATE
@ LAST_ICMP_PREDICATE
Definition: InstrTypes.h:705

llvm::DWARFExpression::Operation
This class represents an Operation in the Expression.
Definition: DWARFExpression.h:32

llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33

llvm::DenseMap
Definition: DenseMap.h:727

llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162

llvm::ElementCount
Definition: TypeSize.h:300

llvm::ElementCount::isVector
constexpr bool isVector() const
One or more elements.
Definition: TypeSize.h:326

llvm::ElementCount::getFixed
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:311

llvm::FPMathOperator
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition: Operator.h:205

llvm::FPMathOperator::getFastMathFlags
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
Definition: Operator.h:338

llvm::FastMathFlags
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20

llvm::GEPNoWrapFlags::inBounds
static GEPNoWrapFlags inBounds()
Definition: GEPNoWrapFlags.h:50

llvm::GEPNoWrapFlags::none
static GEPNoWrapFlags none()
Definition: GEPNoWrapFlags.h:46

llvm::InductionDescriptor::InductionKind
InductionKind
This enum represents the kinds of inductions that we support.
Definition: IVDescriptors.h:337

llvm::InnerLoopVectorizer
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
Definition: LoopVectorize.cpp:477

llvm::InstructionCost
Definition: InstructionCost.h:29

llvm::Instruction
Definition: Instruction.h:68

llvm::Instruction::BinaryOps
BinaryOps
Definition: Instruction.h:1008

llvm::Instruction::CastOps
CastOps
Definition: Instruction.h:1022

llvm::InterleavedAccessInfo
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:630

llvm::LoopInfo
Definition: LoopInfo.h:407

llvm::LoopVectorizationCostModel
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
Definition: LoopVectorize.cpp:977

llvm::LoopVectorizationLegality
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Definition: LoopVectorizationLegality.h:252

llvm::LoopVectorizationPlanner
Planner drives the vectorization process after having passed Legality checks.
Definition: LoopVectorizationPlanner.h:361

llvm::LoopVectorizationPlanner::selectEpilogueVectorizationFactor
VectorizationFactor selectEpilogueVectorizationFactor(const ElementCount MaxVF, unsigned IC)
Definition: LoopVectorize.cpp:4688

llvm::LoopVectorizationPlanner::getPlanFor
VPlan & getPlanFor(ElementCount VF) const
Return the VPlan for VF.
Definition: VPlan.cpp:1606

llvm::LoopVectorizationPlanner::LoopVectorizationPlanner
LoopVectorizationPlanner(Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, const TargetTransformInfo &TTI, LoopVectorizationLegality *Legal, LoopVectorizationCostModel &CM, InterleavedAccessInfo &IAI, PredicatedScalarEvolution &PSE, const LoopVectorizeHints &Hints, OptimizationRemarkEmitter *ORE)
Definition: LoopVectorizationPlanner.h:417

llvm::LoopVectorizationPlanner::planInVPlanNativePath
VectorizationFactor planInVPlanNativePath(ElementCount UserVF)
Use the VPlan-native path to plan how to best vectorize, return the best VF and its cost.
Definition: LoopVectorize.cpp:7068

llvm::LoopVectorizationPlanner::buildVPlans
void buildVPlans(ElementCount MinVF, ElementCount MaxVF)
Build VPlans for power-of-2 VF's between MinVF and MaxVF inclusive, according to the information gath...
Definition: VPlan.cpp:1591

llvm::LoopVectorizationPlanner::computeBestVF
VectorizationFactor computeBestVF()
Compute and return the most profitable vectorization factor.
Definition: LoopVectorize.cpp:7481

llvm::LoopVectorizationPlanner::emitInvalidCostRemarks
void emitInvalidCostRemarks(OptimizationRemarkEmitter *ORE)
Emit remarks for recipes with invalid costs in the available VPlans.
Definition: LoopVectorize.cpp:4347

llvm::LoopVectorizationPlanner::getDecisionAndClampRange
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Definition: VPlan.cpp:1572

llvm::LoopVectorizationPlanner::printPlans
void printPlans(raw_ostream &O)
Definition: VPlan.cpp:1620

llvm::LoopVectorizationPlanner::plan
void plan(ElementCount UserVF, unsigned UserIC)
Build VPlans for the specified UserVF and UserIC if they are non-zero or all applicable candidate VFs...
Definition: LoopVectorize.cpp:7119

llvm::LoopVectorizationPlanner::executePlan
DenseMap< const SCEV *, Value * > executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan, InnerLoopVectorizer &LB, DominatorTree *DT, bool VectorizingEpilogue, const DenseMap< const SCEV *, Value * > *ExpandedSCEVs=nullptr)
Generate the IR code for the vectorized loop captured in VPlan BestPlan according to the best selecte...
Definition: LoopVectorize.cpp:7663

llvm::LoopVectorizationPlanner::hasPlanWithVF
bool hasPlanWithVF(ElementCount VF) const
Look through the existing plans and return true if we have one with vectorization factor VF.
Definition: LoopVectorizationPlanner.h:467

llvm::LoopVectorizeHints
Utility class for getting and setting loop vectorizer hints in the form of loop metadata.
Definition: LoopVectorizationLegality.h:60

llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39

llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:32

llvm::PredicatedScalarEvolution
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
Definition: ScalarEvolution.h:2381

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196

llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:280

llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:212

llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45

llvm::VPBasicBlock
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:3200

llvm::VPBasicBlock::iterator
RecipeListTy::iterator iterator
Instruction iterators...
Definition: VPlan.h:3227

llvm::VPBasicBlock::end
iterator end()
Definition: VPlan.h:3237

llvm::VPBasicBlock::insert
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:3266

llvm::VPBuilder::InsertPointGuard
RAII object that stores the current insertion point and restores it when the object is destroyed.
Definition: LoopVectorizationPlanner.h:274

llvm::VPBuilder::InsertPointGuard::InsertPointGuard
InsertPointGuard(const InsertPointGuard &)=delete

llvm::VPBuilder::InsertPointGuard::InsertPointGuard
InsertPointGuard(VPBuilder &B)
Definition: LoopVectorizationPlanner.h:280

llvm::VPBuilder::InsertPointGuard::operator=
InsertPointGuard & operator=(const InsertPointGuard &)=delete

llvm::VPBuilder::InsertPointGuard::~InsertPointGuard
~InsertPointGuard()
Definition: LoopVectorizationPlanner.h:286

llvm::VPBuilder::VPInsertPoint
InsertPoint - A saved insertion point.
Definition: LoopVectorizationPlanner.h:95

llvm::VPBuilder::VPInsertPoint::VPInsertPoint
VPInsertPoint(VPBasicBlock *InsertBlock, VPBasicBlock::iterator InsertPoint)
Creates a new insertion point at the given location.
Definition: LoopVectorizationPlanner.h:104

llvm::VPBuilder::VPInsertPoint::getBlock
VPBasicBlock * getBlock() const
Definition: LoopVectorizationPlanner.h:110

llvm::VPBuilder::VPInsertPoint::getPoint
VPBasicBlock::iterator getPoint() const
Definition: LoopVectorizationPlanner.h:111

llvm::VPBuilder::VPInsertPoint::VPInsertPoint
VPInsertPoint()=default
Creates a new insertion point which doesn't point to anything.

llvm::VPBuilder::VPInsertPoint::isSet
bool isSet() const
Returns true if this insert point is set.
Definition: LoopVectorizationPlanner.h:108

llvm::VPBuilder
VPlan-based builder utility analogous to IRBuilder.
Definition: LoopVectorizationPlanner.h:46

llvm::VPBuilder::createICmp
VPValue * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL={}, const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
Definition: LoopVectorizationPlanner.h:219

llvm::VPBuilder::setInsertPoint
void setInsertPoint(VPBasicBlock *TheBB, VPBasicBlock::iterator IP)
This specifies that created instructions should be inserted at the specified point.
Definition: LoopVectorizationPlanner.h:132

llvm::VPBuilder::setInsertPoint
void setInsertPoint(VPRecipeBase *IP)
This specifies that created instructions should be inserted at the specified point.
Definition: LoopVectorizationPlanner.h:139

llvm::VPBuilder::restoreIP
void restoreIP(VPInsertPoint IP)
Sets the current insert point to a previously-saved location.
Definition: LoopVectorizationPlanner.h:115

llvm::VPBuilder::createOr
VPValue * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL={}, const Twine &Name="")
Definition: LoopVectorizationPlanner.h:191

llvm::VPBuilder::getInsertBlock
VPBasicBlock * getInsertBlock() const
Definition: LoopVectorizationPlanner.h:84

llvm::VPBuilder::createDerivedIV
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPValue *Start, VPValue *Current, VPValue *Step, const Twine &Name="")
Convert the input value Current to the corresponding value of an induction with Start and Step values...
Definition: LoopVectorizationPlanner.h:241

llvm::VPBuilder::insert
void insert(VPRecipeBase *R)
Insert R at the current insertion point.
Definition: LoopVectorizationPlanner.h:145

llvm::VPBuilder::getInsertPoint
VPBasicBlock::iterator getInsertPoint() const
Definition: LoopVectorizationPlanner.h:85

llvm::VPBuilder::createPtrAdd
VPInstruction * createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL={}, const Twine &Name="")
Definition: LoopVectorizationPlanner.h:227

llvm::VPBuilder::createInBoundsPtrAdd
VPValue * createInBoundsPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL={}, const Twine &Name="")
Definition: LoopVectorizationPlanner.h:232

llvm::VPBuilder::VPBuilder
VPBuilder(VPBasicBlock *InsertBB)
Definition: LoopVectorizationPlanner.h:71

llvm::VPBuilder::createScalarCast
VPScalarCastRecipe * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL)
Definition: LoopVectorizationPlanner.h:249

llvm::VPBuilder::getToInsertAfter
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
Definition: LoopVectorizationPlanner.h:88

llvm::VPBuilder::createNaryOp
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
Definition: LoopVectorizationPlanner.h:159

llvm::VPBuilder::createScalarIVSteps
VPScalarIVStepsRecipe * createScalarIVSteps(Instruction::BinaryOps InductionOpcode, FPMathOperator *FPBinOp, VPValue *IV, VPValue *Step)
Definition: LoopVectorizationPlanner.h:261

llvm::VPBuilder::VPBuilder
VPBuilder(VPRecipeBase *InsertPt)
Definition: LoopVectorizationPlanner.h:72

llvm::VPBuilder::createOverflowingOp
VPInstruction * createOverflowingOp(unsigned Opcode, std::initializer_list< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags, DebugLoc DL={}, const Twine &Name="")
Definition: LoopVectorizationPlanner.h:173

llvm::VPBuilder::createWidenCast
VPWidenCastRecipe * createWidenCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
Definition: LoopVectorizationPlanner.h:255

llvm::VPBuilder::createAnd
VPValue * createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL={}, const Twine &Name="")
Definition: LoopVectorizationPlanner.h:186

llvm::VPBuilder::clearInsertionPoint
void clearInsertionPoint()
Clear the insertion point: created instructions will not be inserted into a block.
Definition: LoopVectorizationPlanner.h:79

llvm::VPBuilder::createNaryOp
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
Definition: LoopVectorizationPlanner.h:149

llvm::VPBuilder::createNot
VPValue * createNot(VPValue *Operand, DebugLoc DL={}, const Twine &Name="")
Definition: LoopVectorizationPlanner.h:181

llvm::VPBuilder::VPBuilder
VPBuilder()=default

llvm::VPBuilder::createNaryOp
VPInstruction * createNaryOp(unsigned Opcode, std::initializer_list< VPValue * > Operands, std::optional< FastMathFlags > FMFs={}, DebugLoc DL={}, const Twine &Name="")
Definition: LoopVectorizationPlanner.h:163

llvm::VPBuilder::createLogicalAnd
VPValue * createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL={}, const Twine &Name="")
Definition: LoopVectorizationPlanner.h:199

llvm::VPBuilder::VPBuilder
VPBuilder(VPBasicBlock *TheBB, VPBasicBlock::iterator IP)
Definition: LoopVectorizationPlanner.h:73

llvm::VPBuilder::createSelect
VPValue * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL={}, const Twine &Name="", std::optional< FastMathFlags > FMFs=std::nullopt)
Definition: LoopVectorizationPlanner.h:205

llvm::VPBuilder::setInsertPoint
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
Definition: LoopVectorizationPlanner.h:124

llvm::VPDerivedIVRecipe
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Definition: VPlan.h:3074

llvm::VPInstruction
This is a concrete Recipe that models a single VPlan-level instruction.
Definition: VPlan.h:845

llvm::VPInstruction::LogicalAnd
@ LogicalAnd
Definition: VPlan.h:874

llvm::VPInstruction::Not
@ Not
Definition: VPlan.h:854

llvm::VPRecipeBase
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:366

llvm::VPRecipeBase::getParent
VPBasicBlock * getParent()
Definition: VPlan.h:391

llvm::VPRecipeBuilder
Helper class to create VPRecipies from IR instructions.
Definition: VPRecipeBuilder.h:48

llvm::VPScalarCastRecipe
VPScalarCastRecipe is a recipe to create scalar cast instructions.
Definition: VPlan.h:1246

llvm::VPScalarIVStepsRecipe
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
Definition: VPlan.h:3143

llvm::VPValue
Definition: VPlanValue.h:46

llvm::VPValue::setUnderlyingValue
void setUnderlyingValue(Value *Val)
Definition: VPlanValue.h:193

llvm::VPWidenCastRecipe
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition: VPlan.h:1194

llvm::VPlan
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition: VPlan.h:3476

llvm::details::FixedOrScalableQuantity::isScalable
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171

llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:132

llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52

llvm::IRSimilarity::Legal
@ Legal
Definition: IRSimilarityIdentifier.h:76

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::Offset
@ Offset
Definition: DWP.cpp:480

llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746

llvm::VPlanPtr
std::unique_ptr< VPlan > VPlanPtr
Definition: VPlan.h:74

llvm::FixedScalableVFPair
A class that represents two vectorization factors (initialized with 0 by default).
Definition: LoopVectorizationPlanner.h:333

llvm::FixedScalableVFPair::FixedVF
ElementCount FixedVF
Definition: LoopVectorizationPlanner.h:334

llvm::FixedScalableVFPair::ScalableVF
ElementCount ScalableVF
Definition: LoopVectorizationPlanner.h:335

llvm::FixedScalableVFPair::FixedScalableVFPair
FixedScalableVFPair(const ElementCount &FixedVF, const ElementCount &ScalableVF)
Definition: LoopVectorizationPlanner.h:343

llvm::FixedScalableVFPair::FixedScalableVFPair
FixedScalableVFPair(const ElementCount &Max)
Definition: LoopVectorizationPlanner.h:340

llvm::FixedScalableVFPair::hasVector
bool hasVector() const
Definition: LoopVectorizationPlanner.h:356

llvm::FixedScalableVFPair::getNone
static FixedScalableVFPair getNone()
Definition: LoopVectorizationPlanner.h:350

llvm::FixedScalableVFPair::FixedScalableVFPair
FixedScalableVFPair()
Definition: LoopVectorizationPlanner.h:337

llvm::VFRange
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Definition: VPlanHelpers.h:62

llvm::VPCostContext
Struct to hold various analysis needed for cost computations.
Definition: VPlanHelpers.h:356

llvm::VPRecipeWithIRFlags::WrapFlagsTy
Definition: VPlan.h:590

llvm::VectorizationFactor
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
Definition: LoopVectorizationPlanner.h:296

llvm::VectorizationFactor::Cost
InstructionCost Cost
Cost of the loop with that width.
Definition: LoopVectorizationPlanner.h:301

llvm::VectorizationFactor::MinProfitableTripCount
ElementCount MinProfitableTripCount
The minimum trip count required to make vectorization profitable, e.g.
Definition: LoopVectorizationPlanner.h:308

llvm::VectorizationFactor::operator==
bool operator==(const VectorizationFactor &rhs) const
Definition: LoopVectorizationPlanner.h:319

llvm::VectorizationFactor::Width
ElementCount Width
Vector width with best cost.
Definition: LoopVectorizationPlanner.h:298

llvm::VectorizationFactor::ScalarCost
InstructionCost ScalarCost
Cost of the scalar loop.
Definition: LoopVectorizationPlanner.h:304

llvm::VectorizationFactor::operator!=
bool operator!=(const VectorizationFactor &rhs) const
Definition: LoopVectorizationPlanner.h:323

llvm::VectorizationFactor::Disabled
static VectorizationFactor Disabled()
Width 1 means no vectorization, cost 0 means uncomputed cost.
Definition: LoopVectorizationPlanner.h:315

llvm::VectorizationFactor::VectorizationFactor
VectorizationFactor(ElementCount Width, InstructionCost Cost, InstructionCost ScalarCost)
Definition: LoopVectorizationPlanner.h:310