LLVM: lib/Transforms/Vectorize/VPlanRecipes.cpp Source File

//===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

///

/// \file

/// This file contains implementations for different VPlan recipes.

///

//===----------------------------------------------------------------------===//


#include "LoopVectorizationPlanner.h"

#include "VPlan.h"

#include "VPlanAnalysis.h"

#include "VPlanHelpers.h"

#include "VPlanPatternMatch.h"

#include "VPlanUtils.h"

#include "llvm/ADT/STLExtras.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/ADT/Twine.h"

#include "llvm/Analysis/IVDescriptors.h"

#include "llvm/Analysis/LoopInfo.h"

#include "llvm/IR/BasicBlock.h"

#include "llvm/IR/IRBuilder.h"

#include "llvm/IR/Instruction.h"

#include "llvm/IR/Instructions.h"

#include "llvm/IR/Intrinsics.h"

#include "llvm/IR/Type.h"

#include "llvm/IR/Value.h"

#include "llvm/IR/VectorBuilder.h"

#include "llvm/Support/Casting.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Support/Debug.h"

#include "llvm/Support/raw_ostream.h"

#include "llvm/Transforms/Utils/BasicBlockUtils.h"

#include "llvm/Transforms/Utils/LoopUtils.h"

#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"

#include <cassert>


using namespace llvm;


using VectorParts = SmallVector<Value *, 2>;


namespace llvm {

extern cl::opt<bool> EnableVPlanNativePath;

}

extern cl::opt<unsigned> ForceTargetInstructionCost;


#define LV_NAME "loop-vectorize"

#define DEBUG_TYPE LV_NAME


bool VPRecipeBase::mayWriteToMemory() const {

  switch (getVPDefID()) {

  case VPInstructionSC:

    return cast<VPInstruction>(this)->opcodeMayReadOrWriteFromMemory();

  case VPInterleaveSC:

    return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0;

  case VPWidenStoreEVLSC:

  case VPWidenStoreSC:

    return true;

  case VPReplicateSC:

    return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())

        ->mayWriteToMemory();

  case VPWidenCallSC:

    return !cast<VPWidenCallRecipe>(this)

                ->getCalledScalarFunction()

                ->onlyReadsMemory();

  case VPWidenIntrinsicSC:

    return cast<VPWidenIntrinsicRecipe>(this)->mayWriteToMemory();

  case VPBranchOnMaskSC:

  case VPScalarIVStepsSC:

  case VPPredInstPHISC:

    return false;

  case VPBlendSC:

  case VPReductionEVLSC:

  case VPReductionSC:

  case VPVectorPointerSC:

  case VPWidenCanonicalIVSC:

  case VPWidenCastSC:

  case VPWidenGEPSC:

  case VPWidenIntOrFpInductionSC:

  case VPWidenLoadEVLSC:

  case VPWidenLoadSC:

  case VPWidenPHISC:

  case VPWidenSC:

  case VPWidenEVLSC:

  case VPWidenSelectSC: {

    const Instruction *I =

        dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());

    (void)I;

    assert((!I || !I->mayWriteToMemory()) &&

           "underlying instruction may write to memory");

    return false;

  }

  default:

    return true;

  }

}


bool VPRecipeBase::mayReadFromMemory() const {

  switch (getVPDefID()) {

  case VPInstructionSC:

    return cast<VPInstruction>(this)->opcodeMayReadOrWriteFromMemory();

  case VPWidenLoadEVLSC:

  case VPWidenLoadSC:

    return true;

  case VPReplicateSC:

    return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())

        ->mayReadFromMemory();

  case VPWidenCallSC:

    return !cast<VPWidenCallRecipe>(this)

                ->getCalledScalarFunction()

                ->onlyWritesMemory();

  case VPWidenIntrinsicSC:

    return cast<VPWidenIntrinsicRecipe>(this)->mayReadFromMemory();

  case VPBranchOnMaskSC:

  case VPPredInstPHISC:

  case VPScalarIVStepsSC:

  case VPWidenStoreEVLSC:

  case VPWidenStoreSC:

    return false;

  case VPBlendSC:

  case VPReductionEVLSC:

  case VPReductionSC:

  case VPVectorPointerSC:

  case VPWidenCanonicalIVSC:

  case VPWidenCastSC:

  case VPWidenGEPSC:

  case VPWidenIntOrFpInductionSC:

  case VPWidenPHISC:

  case VPWidenSC:

  case VPWidenEVLSC:

  case VPWidenSelectSC: {

    const Instruction *I =

        dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());

    (void)I;

    assert((!I || !I->mayReadFromMemory()) &&

           "underlying instruction may read from memory");

    return false;

  }

  default:

    return true;

  }

}


bool VPRecipeBase::mayHaveSideEffects() const {

  switch (getVPDefID()) {

  case VPDerivedIVSC:

  case VPPredInstPHISC:

  case VPScalarCastSC:

  case VPReverseVectorPointerSC:

    return false;

  case VPInstructionSC:

    return mayWriteToMemory();

  case VPWidenCallSC: {

    Function *Fn = cast<VPWidenCallRecipe>(this)->getCalledScalarFunction();

    return mayWriteToMemory() || !Fn->doesNotThrow() || !Fn->willReturn();

  }

  case VPWidenIntrinsicSC:

    return cast<VPWidenIntrinsicRecipe>(this)->mayHaveSideEffects();

  case VPBlendSC:

  case VPReductionEVLSC:

  case VPReductionSC:

  case VPScalarIVStepsSC:

  case VPVectorPointerSC:

  case VPWidenCanonicalIVSC:

  case VPWidenCastSC:

  case VPWidenGEPSC:

  case VPWidenIntOrFpInductionSC:

  case VPWidenPHISC:

  case VPWidenPointerInductionSC:

  case VPWidenSC:

  case VPWidenEVLSC:

  case VPWidenSelectSC: {

    const Instruction *I =

        dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());

    (void)I;

    assert((!I || !I->mayHaveSideEffects()) &&

           "underlying instruction has side-effects");

    return false;

  }

  case VPInterleaveSC:

    return mayWriteToMemory();

  case VPWidenLoadEVLSC:

  case VPWidenLoadSC:

  case VPWidenStoreEVLSC:

  case VPWidenStoreSC:

    assert(

        cast<VPWidenMemoryRecipe>(this)->getIngredient().mayHaveSideEffects() ==

            mayWriteToMemory() &&

        "mayHaveSideffects result for ingredient differs from this "

        "implementation");

    return mayWriteToMemory();

  case VPReplicateSC: {

    auto *R = cast<VPReplicateRecipe>(this);

    return R->getUnderlyingInstr()->mayHaveSideEffects();

  }

  default:

    return true;

  }

}


void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {

  assert(!Parent && "Recipe already in some VPBasicBlock");

  assert(InsertPos->getParent() &&

         "Insertion position not in any VPBasicBlock");

  InsertPos->getParent()->insert(this, InsertPos->getIterator());

}


void VPRecipeBase::insertBefore(VPBasicBlock &BB,

                                iplist<VPRecipeBase>::iterator I) {

  assert(!Parent && "Recipe already in some VPBasicBlock");

  assert(I == BB.end() || I->getParent() == &BB);

  BB.insert(this, I);

}


void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {

  assert(!Parent && "Recipe already in some VPBasicBlock");

  assert(InsertPos->getParent() &&

         "Insertion position not in any VPBasicBlock");

  InsertPos->getParent()->insert(this, std::next(InsertPos->getIterator()));

}


void VPRecipeBase::removeFromParent() {

  assert(getParent() && "Recipe not in any VPBasicBlock");

  getParent()->getRecipeList().remove(getIterator());

  Parent = nullptr;

}


iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {

  assert(getParent() && "Recipe not in any VPBasicBlock");

  return getParent()->getRecipeList().erase(getIterator());

}


void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {

  removeFromParent();

  insertAfter(InsertPos);

}


void VPRecipeBase::moveBefore(VPBasicBlock &BB,

                              iplist<VPRecipeBase>::iterator I) {

  removeFromParent();

  insertBefore(BB, I);

}


InstructionCost VPRecipeBase::cost(ElementCount VF, VPCostContext &Ctx) {

  // Get the underlying instruction for the recipe, if there is one. It is used

  // to

  //   * decide if cost computation should be skipped for this recipe,

  //   * apply forced target instruction cost.

  Instruction *UI = nullptr;

  if (auto *S = dyn_cast<VPSingleDefRecipe>(this))

    UI = dyn_cast_or_null<Instruction>(S->getUnderlyingValue());

  else if (auto *IG = dyn_cast<VPInterleaveRecipe>(this))

    UI = IG->getInsertPos();

  else if (auto *WidenMem = dyn_cast<VPWidenMemoryRecipe>(this))

    UI = &WidenMem->getIngredient();


  InstructionCost RecipeCost;

  if (UI && Ctx.skipCostComputation(UI, VF.isVector())) {

    RecipeCost = 0;

  } else {

    RecipeCost = computeCost(VF, Ctx);

    if (UI && ForceTargetInstructionCost.getNumOccurrences() > 0 &&

        RecipeCost.isValid())

      RecipeCost = InstructionCost(ForceTargetInstructionCost);

  }


  LLVM_DEBUG({

    dbgs() << "Cost of " << RecipeCost << " for VF " << VF << ": ";

    dump();

  });

  return RecipeCost;

}


InstructionCost VPRecipeBase::computeCost(ElementCount VF,

                                          VPCostContext &Ctx) const {

  llvm_unreachable("subclasses should implement computeCost");

}


InstructionCost

VPPartialReductionRecipe::computeCost(ElementCount VF,

                                      VPCostContext &Ctx) const {

  std::optional<unsigned> Opcode = std::nullopt;

  VPRecipeBase *BinOpR = getOperand(0)->getDefiningRecipe();

  if (auto *WidenR = dyn_cast<VPWidenRecipe>(BinOpR))

    Opcode = std::make_optional(WidenR->getOpcode());


  VPRecipeBase *ExtAR = BinOpR->getOperand(0)->getDefiningRecipe();

  VPRecipeBase *ExtBR = BinOpR->getOperand(1)->getDefiningRecipe();


  auto *PhiType = Ctx.Types.inferScalarType(getOperand(1));

  auto *InputTypeA = Ctx.Types.inferScalarType(ExtAR ? ExtAR->getOperand(0)

                                                     : BinOpR->getOperand(0));

  auto *InputTypeB = Ctx.Types.inferScalarType(ExtBR ? ExtBR->getOperand(0)

                                                     : BinOpR->getOperand(1));


  auto GetExtendKind = [](VPRecipeBase *R) {

    // The extend could come from outside the plan.

    if (!R)

      return TargetTransformInfo::PR_None;

    auto *WidenCastR = dyn_cast<VPWidenCastRecipe>(R);

    if (!WidenCastR)

      return TargetTransformInfo::PR_None;

    if (WidenCastR->getOpcode() == Instruction::CastOps::ZExt)

      return TargetTransformInfo::PR_ZeroExtend;

    if (WidenCastR->getOpcode() == Instruction::CastOps::SExt)

      return TargetTransformInfo::PR_SignExtend;

    return TargetTransformInfo::PR_None;

  };


  return Ctx.TTI.getPartialReductionCost(getOpcode(), InputTypeA, InputTypeB,

                                         PhiType, VF, GetExtendKind(ExtAR),

                                         GetExtendKind(ExtBR), Opcode);

}


void VPPartialReductionRecipe::execute(VPTransformState &State) {

  State.setDebugLocFrom(getDebugLoc());

  auto &Builder = State.Builder;


  assert(getOpcode() == Instruction::Add &&

         "Unhandled partial reduction opcode");


  Value *BinOpVal = State.get(getOperand(0));

  Value *PhiVal = State.get(getOperand(1));

  assert(PhiVal && BinOpVal && "Phi and Mul must be set");


  Type *RetTy = PhiVal->getType();


  CallInst *V = Builder.CreateIntrinsic(

      RetTy, Intrinsic::experimental_vector_partial_reduce_add,

      {PhiVal, BinOpVal}, nullptr, "partial.reduce");


  State.set(this, V);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPPartialReductionRecipe::print(raw_ostream &O, const Twine &Indent,

                                     VPSlotTracker &SlotTracker) const {

  O << Indent << "PARTIAL-REDUCE ";

  printAsOperand(O, SlotTracker);

  O << " = " << Instruction::getOpcodeName(getOpcode()) << " ";

  printOperands(O, SlotTracker);

}

#endif


FastMathFlags VPRecipeWithIRFlags::getFastMathFlags() const {

  assert(OpType == OperationType::FPMathOp &&

         "recipe doesn't have fast math flags");

  FastMathFlags Res;

  Res.setAllowReassoc(FMFs.AllowReassoc);

  Res.setNoNaNs(FMFs.NoNaNs);

  Res.setNoInfs(FMFs.NoInfs);

  Res.setNoSignedZeros(FMFs.NoSignedZeros);

  Res.setAllowReciprocal(FMFs.AllowReciprocal);

  Res.setAllowContract(FMFs.AllowContract);

  Res.setApproxFunc(FMFs.ApproxFunc);

  return Res;

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPSingleDefRecipe::dump() const { VPDef::dump(); }

#endif


template <unsigned PartOpIdx>

VPValue *

VPUnrollPartAccessor<PartOpIdx>::getUnrollPartOperand(VPUser &U) const {

  if (U.getNumOperands() == PartOpIdx + 1)

    return U.getOperand(PartOpIdx);

  return nullptr;

}


template <unsigned PartOpIdx>

unsigned VPUnrollPartAccessor<PartOpIdx>::getUnrollPart(VPUser &U) const {

  if (auto *UnrollPartOp = getUnrollPartOperand(U))

    return cast<ConstantInt>(UnrollPartOp->getLiveInIRValue())->getZExtValue();

  return 0;

}


VPInstruction::VPInstruction(unsigned Opcode, CmpInst::Predicate Pred,

                             VPValue *A, VPValue *B, DebugLoc DL,

                             const Twine &Name)

    : VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}),

                          Pred, DL),

      Opcode(Opcode), Name(Name.str()) {

  assert(Opcode == Instruction::ICmp &&

         "only ICmp predicates supported at the moment");

}


VPInstruction::VPInstruction(unsigned Opcode,

                             std::initializer_list<VPValue *> Operands,

                             FastMathFlags FMFs, DebugLoc DL, const Twine &Name)

    : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL),

      Opcode(Opcode), Name(Name.str()) {

  // Make sure the VPInstruction is a floating-point operation.

  assert(isFPMathOp() && "this op can't take fast-math flags");

}


bool VPInstruction::doesGeneratePerAllLanes() const {

  return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this);

}


bool VPInstruction::canGenerateScalarForFirstLane() const {

  if (Instruction::isBinaryOp(getOpcode()))

    return true;

  if (isSingleScalar() || isVectorToScalar())

    return true;

  switch (Opcode) {

  case Instruction::ICmp:

  case Instruction::Select:

  case VPInstruction::BranchOnCond:

  case VPInstruction::BranchOnCount:

  case VPInstruction::CalculateTripCountMinusVF:

  case VPInstruction::CanonicalIVIncrementForPart:

  case VPInstruction::PtrAdd:

  case VPInstruction::ExplicitVectorLength:

  case VPInstruction::AnyOf:

    return true;

  default:

    return false;

  }

}


Value *VPInstruction::generatePerLane(VPTransformState &State,

                                      const VPLane &Lane) {

  IRBuilderBase &Builder = State.Builder;


  assert(getOpcode() == VPInstruction::PtrAdd &&

         "only PtrAdd opcodes are supported for now");

  return Builder.CreatePtrAdd(State.get(getOperand(0), Lane),

                              State.get(getOperand(1), Lane), Name);

}


Value *VPInstruction::generate(VPTransformState &State) {

  IRBuilderBase &Builder = State.Builder;


  if (Instruction::isBinaryOp(getOpcode())) {

    bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);

    Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);

    Value *B = State.get(getOperand(1), OnlyFirstLaneUsed);

    auto *Res =

        Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);

    if (auto *I = dyn_cast<Instruction>(Res))

      setFlags(I);

    return Res;

  }


  switch (getOpcode()) {

  case VPInstruction::Not: {

    Value *A = State.get(getOperand(0));

    return Builder.CreateNot(A, Name);

  }

  case Instruction::ICmp: {

    bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);

    Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);

    Value *B = State.get(getOperand(1), OnlyFirstLaneUsed);

    return Builder.CreateCmp(getPredicate(), A, B, Name);

  }

  case Instruction::Select: {

    bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);

    Value *Cond = State.get(getOperand(0), OnlyFirstLaneUsed);

    Value *Op1 = State.get(getOperand(1), OnlyFirstLaneUsed);

    Value *Op2 = State.get(getOperand(2), OnlyFirstLaneUsed);

    return Builder.CreateSelect(Cond, Op1, Op2, Name);

  }

  case VPInstruction::ActiveLaneMask: {

    // Get first lane of vector induction variable.

    Value *VIVElem0 = State.get(getOperand(0), VPLane(0));

    // Get the original loop tripcount.

    Value *ScalarTC = State.get(getOperand(1), VPLane(0));


    // If this part of the active lane mask is scalar, generate the CMP directly

    // to avoid unnecessary extracts.

    if (State.VF.isScalar())

      return Builder.CreateCmp(CmpInst::Predicate::ICMP_ULT, VIVElem0, ScalarTC,

                               Name);


    auto *Int1Ty = Type::getInt1Ty(Builder.getContext());

    auto *PredTy = VectorType::get(Int1Ty, State.VF);

    return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,

                                   {PredTy, ScalarTC->getType()},

                                   {VIVElem0, ScalarTC}, nullptr, Name);

  }

  case VPInstruction::FirstOrderRecurrenceSplice: {

    // Generate code to combine the previous and current values in vector v3.

    //

    //   vector.ph:

    //     v_init = vector(..., ..., ..., a[-1])

    //     br vector.body

    //

    //   vector.body

    //     i = phi [0, vector.ph], [i+4, vector.body]

    //     v1 = phi [v_init, vector.ph], [v2, vector.body]

    //     v2 = a[i, i+1, i+2, i+3];

    //     v3 = vector(v1(3), v2(0, 1, 2))


    auto *V1 = State.get(getOperand(0));

    if (!V1->getType()->isVectorTy())

      return V1;

    Value *V2 = State.get(getOperand(1));

    return Builder.CreateVectorSplice(V1, V2, -1, Name);

  }

  case VPInstruction::CalculateTripCountMinusVF: {

    unsigned UF = getParent()->getPlan()->getUF();

    Value *ScalarTC = State.get(getOperand(0), VPLane(0));

    Value *Step = createStepForVF(Builder, ScalarTC->getType(), State.VF, UF);

    Value *Sub = Builder.CreateSub(ScalarTC, Step);

    Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);

    Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);

    return Builder.CreateSelect(Cmp, Sub, Zero);

  }

  case VPInstruction::ExplicitVectorLength: {

    // TODO: Restructure this code with an explicit remainder loop, vsetvli can

    // be outside of the main loop.

    Value *AVL = State.get(getOperand(0), /*IsScalar*/ true);

    // Compute EVL

    assert(AVL->getType()->isIntegerTy() &&

           "Requested vector length should be an integer.");


    assert(State.VF.isScalable() && "Expected scalable vector factor.");

    Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue());


    Value *EVL = State.Builder.CreateIntrinsic(

        State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length,

        {AVL, VFArg, State.Builder.getTrue()});

    return EVL;

  }

  case VPInstruction::CanonicalIVIncrementForPart: {

    unsigned Part = getUnrollPart(*this);

    auto *IV = State.get(getOperand(0), VPLane(0));

    assert(Part != 0 && "Must have a positive part");

    // The canonical IV is incremented by the vectorization factor (num of

    // SIMD elements) times the unroll part.

    Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);

    return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(),

                             hasNoSignedWrap());

  }

  case VPInstruction::BranchOnCond: {

    Value *Cond = State.get(getOperand(0), VPLane(0));

    // Replace the temporary unreachable terminator with a new conditional

    // branch, hooking it up to backward destination for exiting blocks now and

    // to forward destination(s) later when they are created.

    BranchInst *CondBr =

        Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);

    CondBr->setSuccessor(0, nullptr);

    Builder.GetInsertBlock()->getTerminator()->eraseFromParent();


    if (!getParent()->isExiting())

      return CondBr;


    VPRegionBlock *ParentRegion = getParent()->getParent();

    VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();

    CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);

    return CondBr;

  }

  case VPInstruction::BranchOnCount: {

    // First create the compare.

    Value *IV = State.get(getOperand(0), /*IsScalar*/ true);

    Value *TC = State.get(getOperand(1), /*IsScalar*/ true);

    Value *Cond = Builder.CreateICmpEQ(IV, TC);


    // Now create the branch.

    auto *Plan = getParent()->getPlan();

    VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();

    VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();


    // Replace the temporary unreachable terminator with a new conditional

    // branch, hooking it up to backward destination (the header) now and to the

    // forward destination (the exit/middle block) later when it is created.

    // Note that CreateCondBr expects a valid BB as first argument, so we need

    // to set it to nullptr later.

    BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),

                                              State.CFG.VPBB2IRBB[Header]);

    CondBr->setSuccessor(0, nullptr);

    Builder.GetInsertBlock()->getTerminator()->eraseFromParent();

    return CondBr;

  }

  case VPInstruction::ComputeReductionResult: {

    // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary

    // and will be removed by breaking up the recipe further.

    auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));

    auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());

    // Get its reduction variable descriptor.

    const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();


    RecurKind RK = RdxDesc.getRecurrenceKind();


    Type *PhiTy = OrigPhi->getType();

    // The recipe's operands are the reduction phi, followed by one operand for

    // each part of the reduction.

    unsigned UF = getNumOperands() - 1;

    VectorParts RdxParts(UF);

    for (unsigned Part = 0; Part < UF; ++Part)

      RdxParts[Part] = State.get(getOperand(1 + Part), PhiR->isInLoop());


    // If the vector reduction can be performed in a smaller type, we truncate

    // then extend the loop exit value to enable InstCombine to evaluate the

    // entire expression in the smaller type.

    // TODO: Handle this in truncateToMinBW.

    if (State.VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {

      Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), State.VF);

      for (unsigned Part = 0; Part < UF; ++Part)

        RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);

    }

    // Reduce all of the unrolled parts into a single vector.

    Value *ReducedPartRdx = RdxParts[0];

    unsigned Op = RdxDesc.getOpcode();

    if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK))

      Op = Instruction::Or;


    if (PhiR->isOrdered()) {

      ReducedPartRdx = RdxParts[UF - 1];

    } else {

      // Floating-point operations should have some FMF to enable the reduction.

      IRBuilderBase::FastMathFlagGuard FMFG(Builder);

      Builder.setFastMathFlags(RdxDesc.getFastMathFlags());

      for (unsigned Part = 1; Part < UF; ++Part) {

        Value *RdxPart = RdxParts[Part];

        if (Op != Instruction::ICmp && Op != Instruction::FCmp)

          ReducedPartRdx = Builder.CreateBinOp(

              (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");

        else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK))

          ReducedPartRdx =

              createMinMaxOp(Builder, RecurKind::SMax, ReducedPartRdx, RdxPart);

        else

          ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);

      }

    }


    // Create the reduction after the loop. Note that inloop reductions create

    // the target reduction in the loop using a Reduction recipe.

    if ((State.VF.isVector() ||

         RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) ||

         RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) &&

        !PhiR->isInLoop()) {

      ReducedPartRdx =

          createReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);

      // If the reduction can be performed in a smaller type, we need to extend

      // the reduction to the wider type before we branch to the original loop.

      if (PhiTy != RdxDesc.getRecurrenceType())

        ReducedPartRdx = RdxDesc.isSigned()

                             ? Builder.CreateSExt(ReducedPartRdx, PhiTy)

                             : Builder.CreateZExt(ReducedPartRdx, PhiTy);

    }


    return ReducedPartRdx;

  }

  case VPInstruction::ExtractFromEnd: {

    auto *CI = cast<ConstantInt>(getOperand(1)->getLiveInIRValue());

    unsigned Offset = CI->getZExtValue();

    assert(Offset > 0 && "Offset from end must be positive");

    Value *Res;

    if (State.VF.isVector()) {

      assert(Offset <= State.VF.getKnownMinValue() &&

             "invalid offset to extract from");

      // Extract lane VF - Offset from the operand.

      Res = State.get(getOperand(0), VPLane::getLaneFromEnd(State.VF, Offset));

    } else {

      assert(Offset <= 1 && "invalid offset to extract from");

      Res = State.get(getOperand(0));

    }

    if (isa<ExtractElementInst>(Res))

      Res->setName(Name);

    return Res;

  }

  case VPInstruction::LogicalAnd: {

    Value *A = State.get(getOperand(0));

    Value *B = State.get(getOperand(1));

    return Builder.CreateLogicalAnd(A, B, Name);

  }

  case VPInstruction::PtrAdd: {

    assert(vputils::onlyFirstLaneUsed(this) &&

           "can only generate first lane for PtrAdd");

    Value *Ptr = State.get(getOperand(0), VPLane(0));

    Value *Addend = State.get(getOperand(1), VPLane(0));

    return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());

  }

  case VPInstruction::ResumePhi: {

    Value *IncomingFromVPlanPred =

        State.get(getOperand(0), /* IsScalar */ true);

    Value *IncomingFromOtherPreds =

        State.get(getOperand(1), /* IsScalar */ true);

    auto *NewPhi =

        Builder.CreatePHI(State.TypeAnalysis.inferScalarType(this), 2, Name);

    BasicBlock *VPlanPred =

        State.CFG

            .VPBB2IRBB[cast<VPBasicBlock>(getParent()->getPredecessors()[0])];

    NewPhi->addIncoming(IncomingFromVPlanPred, VPlanPred);

    for (auto *OtherPred : predecessors(Builder.GetInsertBlock())) {

      if (OtherPred == VPlanPred)

        continue;

      NewPhi->addIncoming(IncomingFromOtherPreds, OtherPred);

    }

    return NewPhi;

  }

  case VPInstruction::AnyOf: {

    Value *A = State.get(getOperand(0));

    return Builder.CreateOrReduce(A);

  }

  case VPInstruction::ExtractFirstActive: {

    Value *Vec = State.get(getOperand(0));

    Value *Mask = State.get(getOperand(1));

    Value *Ctz = Builder.CreateCountTrailingZeroElems(

        Builder.getInt64Ty(), Mask, true, "first.active.lane");

    return Builder.CreateExtractElement(Vec, Ctz, "early.exit.value");

  }

  default:

    llvm_unreachable("Unsupported opcode for instruction");

  }

}


bool VPInstruction::isVectorToScalar() const {

  return getOpcode() == VPInstruction::ExtractFromEnd ||

         getOpcode() == VPInstruction::ExtractFirstActive ||

         getOpcode() == VPInstruction::ComputeReductionResult ||

         getOpcode() == VPInstruction::AnyOf;

}


bool VPInstruction::isSingleScalar() const {

  return getOpcode() == VPInstruction::ResumePhi;

}


#if !defined(NDEBUG)

bool VPInstruction::isFPMathOp() const {

  // Inspired by FPMathOperator::classof. Notable differences are that we don't

  // support Call, PHI and Select opcodes here yet.

  return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||

         Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||

         Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||

         Opcode == Instruction::FCmp || Opcode == Instruction::Select;

}

#endif


void VPInstruction::execute(VPTransformState &State) {

  assert(!State.Lane && "VPInstruction executing an Lane");

  IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);

  assert((hasFastMathFlags() == isFPMathOp() ||

          getOpcode() == Instruction::Select) &&

         "Recipe not a FPMathOp but has fast-math flags?");

  if (hasFastMathFlags())

    State.Builder.setFastMathFlags(getFastMathFlags());

  State.setDebugLocFrom(getDebugLoc());

  bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() &&

                                   (vputils::onlyFirstLaneUsed(this) ||

                                    isVectorToScalar() || isSingleScalar());

  bool GeneratesPerAllLanes = doesGeneratePerAllLanes();

  if (GeneratesPerAllLanes) {

    for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue();

         Lane != NumLanes; ++Lane) {

      Value *GeneratedValue = generatePerLane(State, VPLane(Lane));

      assert(GeneratedValue && "generatePerLane must produce a value");

      State.set(this, GeneratedValue, VPLane(Lane));

    }

    return;

  }


  Value *GeneratedValue = generate(State);

  if (!hasResult())

    return;

  assert(GeneratedValue && "generate must produce a value");

  assert(

      (GeneratedValue->getType()->isVectorTy() == !GeneratesPerFirstLaneOnly ||

       State.VF.isScalar()) &&

      "scalar value but not only first lane defined");

  State.set(this, GeneratedValue,

            /*IsScalar*/ GeneratesPerFirstLaneOnly);

}


bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {

  if (Instruction::isBinaryOp(getOpcode()))

    return false;

  switch (getOpcode()) {

  case Instruction::ICmp:

  case Instruction::Select:

  case VPInstruction::AnyOf:

  case VPInstruction::CalculateTripCountMinusVF:

  case VPInstruction::CanonicalIVIncrementForPart:

  case VPInstruction::ExtractFromEnd:

  case VPInstruction::ExtractFirstActive:

  case VPInstruction::FirstOrderRecurrenceSplice:

  case VPInstruction::LogicalAnd:

  case VPInstruction::Not:

  case VPInstruction::PtrAdd:

    return false;

  default:

    return true;

  }

}


bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {

  assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");

  if (Instruction::isBinaryOp(getOpcode()))

    return vputils::onlyFirstLaneUsed(this);


  switch (getOpcode()) {

  default:

    return false;

  case Instruction::ICmp:

  case Instruction::Select:

  case Instruction::Or:

  case VPInstruction::PtrAdd:

    // TODO: Cover additional opcodes.

    return vputils::onlyFirstLaneUsed(this);

  case VPInstruction::ActiveLaneMask:

  case VPInstruction::ExplicitVectorLength:

  case VPInstruction::CalculateTripCountMinusVF:

  case VPInstruction::CanonicalIVIncrementForPart:

  case VPInstruction::BranchOnCount:

  case VPInstruction::BranchOnCond:

  case VPInstruction::ResumePhi:

    return true;

  };

  llvm_unreachable("switch should return");

}


bool VPInstruction::onlyFirstPartUsed(const VPValue *Op) const {

  assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");

  if (Instruction::isBinaryOp(getOpcode()))

    return vputils::onlyFirstPartUsed(this);


  switch (getOpcode()) {

  default:

    return false;

  case Instruction::ICmp:

  case Instruction::Select:

    return vputils::onlyFirstPartUsed(this);

  case VPInstruction::BranchOnCount:

  case VPInstruction::BranchOnCond:

  case VPInstruction::CanonicalIVIncrementForPart:

    return true;

  };

  llvm_unreachable("switch should return");

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPInstruction::dump() const {

  VPSlotTracker SlotTracker(getParent()->getPlan());

  print(dbgs(), "", SlotTracker);

}


void VPInstruction::print(raw_ostream &O, const Twine &Indent,

                          VPSlotTracker &SlotTracker) const {

  O << Indent << "EMIT ";


  if (hasResult()) {

    printAsOperand(O, SlotTracker);

    O << " = ";

  }


  switch (getOpcode()) {

  case VPInstruction::Not:

    O << "not";

    break;

  case VPInstruction::SLPLoad:

    O << "combined load";

    break;

  case VPInstruction::SLPStore:

    O << "combined store";

    break;

  case VPInstruction::ActiveLaneMask:

    O << "active lane mask";

    break;

  case VPInstruction::ResumePhi:

    O << "resume-phi";

    break;

  case VPInstruction::ExplicitVectorLength:

    O << "EXPLICIT-VECTOR-LENGTH";

    break;

  case VPInstruction::FirstOrderRecurrenceSplice:

    O << "first-order splice";

    break;

  case VPInstruction::BranchOnCond:

    O << "branch-on-cond";

    break;

  case VPInstruction::CalculateTripCountMinusVF:

    O << "TC > VF ? TC - VF : 0";

    break;

  case VPInstruction::CanonicalIVIncrementForPart:

    O << "VF * Part +";

    break;

  case VPInstruction::BranchOnCount:

    O << "branch-on-count";

    break;

  case VPInstruction::ExtractFromEnd:

    O << "extract-from-end";

    break;

  case VPInstruction::ComputeReductionResult:

    O << "compute-reduction-result";

    break;

  case VPInstruction::LogicalAnd:

    O << "logical-and";

    break;

  case VPInstruction::PtrAdd:

    O << "ptradd";

    break;

  case VPInstruction::AnyOf:

    O << "any-of";

    break;

  case VPInstruction::ExtractFirstActive:

    O << "extract-first-active";

    break;

  default:

    O << Instruction::getOpcodeName(getOpcode());

  }


  printFlags(O);

  printOperands(O, SlotTracker);


  if (auto DL = getDebugLoc()) {

    O << ", !dbg ";

    DL.print(O);

  }

}

#endif


void VPIRInstruction::execute(VPTransformState &State) {

  assert((isa<PHINode>(&I) || getNumOperands() == 0) &&

         "Only PHINodes can have extra operands");

  for (const auto &[Idx, Op] : enumerate(operands())) {

    VPValue *ExitValue = Op;

    auto Lane = vputils::isUniformAfterVectorization(ExitValue)

                    ? VPLane::getFirstLane()

                    : VPLane::getLastLaneForVF(State.VF);

    VPBlockBase *Pred = getParent()->getPredecessors()[Idx];

    auto *PredVPBB = Pred->getExitingBasicBlock();

    BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];

    // Set insertion point in PredBB in case an extract needs to be generated.

    // TODO: Model extracts explicitly.

    State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());

    Value *V = State.get(ExitValue, VPLane(Lane));

    auto *Phi = cast<PHINode>(&I);

    // If there is no existing block for PredBB in the phi, add a new incoming

    // value. Otherwise update the existing incoming value for PredBB.

    if (Phi->getBasicBlockIndex(PredBB) == -1)

      Phi->addIncoming(V, PredBB);

    else

      Phi->setIncomingValueForBlock(PredBB, V);

  }


  // Advance the insert point after the wrapped IR instruction. This allows

  // interleaving VPIRInstructions and other recipes.

  State.Builder.SetInsertPoint(I.getParent(), std::next(I.getIterator()));

}


InstructionCost VPIRInstruction::computeCost(ElementCount VF,

                                             VPCostContext &Ctx) const {

  // The recipe wraps an existing IR instruction on the border of VPlan's scope,

  // hence it does not contribute to the cost-modeling for the VPlan.

  return 0;

}


void VPIRInstruction::extractLastLaneOfOperand(VPBuilder &Builder) {

  assert(isa<PHINode>(getInstruction()) &&

         "can only add exiting operands to phi nodes");

  assert(getNumOperands() == 1 && "must have a single operand");

  VPValue *Exiting = getOperand(0);

  if (!Exiting->isLiveIn()) {

    LLVMContext &Ctx = getInstruction().getContext();

    auto &Plan = *getParent()->getPlan();

    Exiting = Builder.createNaryOp(

        VPInstruction::ExtractFromEnd,

        {Exiting,

         Plan.getOrAddLiveIn(ConstantInt::get(IntegerType::get(Ctx, 32), 1))});

  }

  setOperand(0, Exiting);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPIRInstruction::print(raw_ostream &O, const Twine &Indent,

                            VPSlotTracker &SlotTracker) const {

  O << Indent << "IR " << I;


  if (getNumOperands() != 0) {

    O << " (extra operand" << (getNumOperands() > 1 ? "s" : "") << ": ";

    interleaveComma(

        enumerate(operands()), O, [this, &O, &SlotTracker](auto Op) {

          Op.value()->printAsOperand(O, SlotTracker);

          O << " from ";

          getParent()->getPredecessors()[Op.index()]->printAsOperand(O);

        });

    O << ")";

  }

}

#endif


void VPWidenCallRecipe::execute(VPTransformState &State) {

  assert(State.VF.isVector() && "not widening");

  State.setDebugLocFrom(getDebugLoc());


  FunctionType *VFTy = Variant->getFunctionType();

  // Add return type if intrinsic is overloaded on it.

  SmallVector<Value *, 4> Args;

  for (const auto &I : enumerate(arg_operands())) {

    Value *Arg;

    // Some vectorized function variants may also take a scalar argument,

    // e.g. linear parameters for pointers. This needs to be the scalar value

    // from the start of the respective part when interleaving.

    if (!VFTy->getParamType(I.index())->isVectorTy())

      Arg = State.get(I.value(), VPLane(0));

    else

      Arg = State.get(I.value(), onlyFirstLaneUsed(I.value()));

    Args.push_back(Arg);

  }


  assert(Variant != nullptr && "Can't create vector function.");


  auto *CI = cast_or_null<CallInst>(getUnderlyingValue());

  SmallVector<OperandBundleDef, 1> OpBundles;

  if (CI)

    CI->getOperandBundlesAsDefs(OpBundles);


  CallInst *V = State.Builder.CreateCall(Variant, Args, OpBundles);

  setFlags(V);


  if (!V->getType()->isVoidTy())

    State.set(this, V);

  State.addMetadata(V, CI);

}


InstructionCost VPWidenCallRecipe::computeCost(ElementCount VF,

                                               VPCostContext &Ctx) const {

  return Ctx.TTI.getCallInstrCost(nullptr, Variant->getReturnType(),

                                  Variant->getFunctionType()->params(),

                                  Ctx.CostKind);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,

                              VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN-CALL ";


  Function *CalledFn = getCalledScalarFunction();

  if (CalledFn->getReturnType()->isVoidTy())

    O << "void ";

  else {

    printAsOperand(O, SlotTracker);

    O << " = ";

  }


  O << "call";

  printFlags(O);

  O << " @" << CalledFn->getName() << "(";

  interleaveComma(arg_operands(), O, [&O, &SlotTracker](VPValue *Op) {

    Op->printAsOperand(O, SlotTracker);

  });

  O << ")";


  O << " (using library function";

  if (Variant->hasName())

    O << ": " << Variant->getName();

  O << ")";

}

#endif


void VPWidenIntrinsicRecipe::execute(VPTransformState &State) {

  assert(State.VF.isVector() && "not widening");

  State.setDebugLocFrom(getDebugLoc());


  SmallVector<Type *, 2> TysForDecl;

  // Add return type if intrinsic is overloaded on it.

  if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1, State.TTI))

    TysForDecl.push_back(VectorType::get(getResultType(), State.VF));

  SmallVector<Value *, 4> Args;

  for (const auto &I : enumerate(operands())) {

    // Some intrinsics have a scalar argument - don't replace it with a

    // vector.

    Value *Arg;

    if (isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index(),

                                           State.TTI))

      Arg = State.get(I.value(), VPLane(0));

    else

      Arg = State.get(I.value(), onlyFirstLaneUsed(I.value()));

    if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index(),

                                               State.TTI))

      TysForDecl.push_back(Arg->getType());

    Args.push_back(Arg);

  }


  // Use vector version of the intrinsic.

  Module *M = State.Builder.GetInsertBlock()->getModule();

  Function *VectorF =

      Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl);

  assert(VectorF &&

         "Can't retrieve vector intrinsic or vector-predication intrinsics.");


  auto *CI = cast_or_null<CallInst>(getUnderlyingValue());

  SmallVector<OperandBundleDef, 1> OpBundles;

  if (CI)

    CI->getOperandBundlesAsDefs(OpBundles);


  CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);


  setFlags(V);


  if (!V->getType()->isVoidTy())

    State.set(this, V);

  State.addMetadata(V, CI);

}


InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF,

                                                    VPCostContext &Ctx) const {

  // Some backends analyze intrinsic arguments to determine cost. Use the

  // underlying value for the operand if it has one. Otherwise try to use the

  // operand of the underlying call instruction, if there is one. Otherwise

  // clear Arguments.

  // TODO: Rework TTI interface to be independent of concrete IR values.

  SmallVector<const Value *> Arguments;

  for (const auto &[Idx, Op] : enumerate(operands())) {

    auto *V = Op->getUnderlyingValue();

    if (!V) {

      // Push all the VP Intrinsic's ops into the Argments even if is nullptr.

      // Some VP Intrinsic's cost will assert the number of parameters.

      // Mainly appears in the following two scenarios:

      // 1. EVL Op is nullptr

      // 2. The Argmunt of the VP Intrinsic is also the VP Intrinsic

      if (VPIntrinsic::isVPIntrinsic(VectorIntrinsicID)) {

        Arguments.push_back(V);

        continue;

      }

      if (auto *UI = dyn_cast_or_null<CallBase>(getUnderlyingValue())) {

        Arguments.push_back(UI->getArgOperand(Idx));

        continue;

      }

      Arguments.clear();

      break;

    }

    Arguments.push_back(V);

  }


  Type *RetTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);

  SmallVector<Type *> ParamTys;

  for (unsigned I = 0; I != getNumOperands(); ++I)

    ParamTys.push_back(

        toVectorTy(Ctx.Types.inferScalarType(getOperand(I)), VF));


  // TODO: Rework TTI interface to avoid reliance on underlying IntrinsicInst.

  FastMathFlags FMF = hasFastMathFlags() ? getFastMathFlags() : FastMathFlags();

  IntrinsicCostAttributes CostAttrs(

      VectorIntrinsicID, RetTy, Arguments, ParamTys, FMF,

      dyn_cast_or_null<IntrinsicInst>(getUnderlyingValue()));

  return Ctx.TTI.getIntrinsicInstrCost(CostAttrs, Ctx.CostKind);

}


StringRef VPWidenIntrinsicRecipe::getIntrinsicName() const {

  return Intrinsic::getBaseName(VectorIntrinsicID);

}


bool VPWidenIntrinsicRecipe::onlyFirstLaneUsed(const VPValue *Op) const {

  assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");

  // Vector predication intrinsics only demand the the first lane the last

  // operand (the EVL operand).

  return VPIntrinsic::isVPIntrinsic(VectorIntrinsicID) &&

         Op == getOperand(getNumOperands() - 1);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPWidenIntrinsicRecipe::print(raw_ostream &O, const Twine &Indent,

                                   VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN-INTRINSIC ";

  if (ResultTy->isVoidTy()) {

    O << "void ";

  } else {

    printAsOperand(O, SlotTracker);

    O << " = ";

  }


  O << "call";

  printFlags(O);

  O << getIntrinsicName() << "(";


  interleaveComma(operands(), O, [&O, &SlotTracker](VPValue *Op) {

    Op->printAsOperand(O, SlotTracker);

  });

  O << ")";

}

#endif


void VPHistogramRecipe::execute(VPTransformState &State) {

  State.setDebugLocFrom(getDebugLoc());

  IRBuilderBase &Builder = State.Builder;


  Value *Address = State.get(getOperand(0));

  Value *IncAmt = State.get(getOperand(1), /*IsScalar=*/true);

  VectorType *VTy = cast<VectorType>(Address->getType());


  // The histogram intrinsic requires a mask even if the recipe doesn't;

  // if the mask operand was omitted then all lanes should be executed and

  // we just need to synthesize an all-true mask.

  Value *Mask = nullptr;

  if (VPValue *VPMask = getMask())

    Mask = State.get(VPMask);

  else

    Mask =

        Builder.CreateVectorSplat(VTy->getElementCount(), Builder.getInt1(1));


  // If this is a subtract, we want to invert the increment amount. We may

  // add a separate intrinsic in future, but for now we'll try this.

  if (Opcode == Instruction::Sub)

    IncAmt = Builder.CreateNeg(IncAmt);

  else

    assert(Opcode == Instruction::Add && "only add or sub supported for now");


  State.Builder.CreateIntrinsic(Intrinsic::experimental_vector_histogram_add,

                                {VTy, IncAmt->getType()},

                                {Address, IncAmt, Mask});

}


InstructionCost VPHistogramRecipe::computeCost(ElementCount VF,

                                               VPCostContext &Ctx) const {

  // FIXME: Take the gather and scatter into account as well. For now we're

  //        generating the same cost as the fallback path, but we'll likely

  //        need to create a new TTI method for determining the cost, including

  //        whether we can use base + vec-of-smaller-indices or just

  //        vec-of-pointers.

  assert(VF.isVector() && "Invalid VF for histogram cost");

  Type *AddressTy = Ctx.Types.inferScalarType(getOperand(0));

  VPValue *IncAmt = getOperand(1);

  Type *IncTy = Ctx.Types.inferScalarType(IncAmt);

  VectorType *VTy = VectorType::get(IncTy, VF);


  // Assume that a non-constant update value (or a constant != 1) requires

  // a multiply, and add that into the cost.

  InstructionCost MulCost =

      Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VTy, Ctx.CostKind);

  if (IncAmt->isLiveIn()) {

    ConstantInt *CI = dyn_cast<ConstantInt>(IncAmt->getLiveInIRValue());


    if (CI && CI->getZExtValue() == 1)

      MulCost = TTI::TCC_Free;

  }


  // Find the cost of the histogram operation itself.

  Type *PtrTy = VectorType::get(AddressTy, VF);

  Type *MaskTy = VectorType::get(Type::getInt1Ty(Ctx.LLVMCtx), VF);

  IntrinsicCostAttributes ICA(Intrinsic::experimental_vector_histogram_add,

                              Type::getVoidTy(Ctx.LLVMCtx),

                              {PtrTy, IncTy, MaskTy});


  // Add the costs together with the add/sub operation.

  return Ctx.TTI.getIntrinsicInstrCost(ICA, Ctx.CostKind) + MulCost +

         Ctx.TTI.getArithmeticInstrCost(Opcode, VTy, Ctx.CostKind);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPHistogramRecipe::print(raw_ostream &O, const Twine &Indent,

                              VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN-HISTOGRAM buckets: ";

  getOperand(0)->printAsOperand(O, SlotTracker);


  if (Opcode == Instruction::Sub)

    O << ", dec: ";

  else {

    assert(Opcode == Instruction::Add);

    O << ", inc: ";

  }

  getOperand(1)->printAsOperand(O, SlotTracker);


  if (VPValue *Mask = getMask()) {

    O << ", mask: ";

    Mask->printAsOperand(O, SlotTracker);

  }

}


void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,

                                VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN-SELECT ";

  printAsOperand(O, SlotTracker);

  O << " = select ";

  printFlags(O);

  getOperand(0)->printAsOperand(O, SlotTracker);

  O << ", ";

  getOperand(1)->printAsOperand(O, SlotTracker);

  O << ", ";

  getOperand(2)->printAsOperand(O, SlotTracker);

  O << (isInvariantCond() ? " (condition is loop invariant)" : "");

}

#endif


void VPWidenSelectRecipe::execute(VPTransformState &State) {

  State.setDebugLocFrom(getDebugLoc());


  // The condition can be loop invariant but still defined inside the

  // loop. This means that we can't just use the original 'cond' value.

  // We have to take the 'vectorized' value and pick the first lane.

  // Instcombine will make this a no-op.

  auto *InvarCond =

      isInvariantCond() ? State.get(getCond(), VPLane(0)) : nullptr;


  Value *Cond = InvarCond ? InvarCond : State.get(getCond());

  Value *Op0 = State.get(getOperand(1));

  Value *Op1 = State.get(getOperand(2));

  Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);

  State.set(this, Sel);

  if (isa<FPMathOperator>(Sel))

    setFlags(cast<Instruction>(Sel));

  State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue()));

}


InstructionCost VPWidenSelectRecipe::computeCost(ElementCount VF,

                                                 VPCostContext &Ctx) const {

  SelectInst *SI = cast<SelectInst>(getUnderlyingValue());

  bool ScalarCond = getOperand(0)->isDefinedOutsideLoopRegions();

  Type *ScalarTy = Ctx.Types.inferScalarType(this);

  Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);


  VPValue *Op0, *Op1;

  using namespace llvm::VPlanPatternMatch;

  if (!ScalarCond && ScalarTy->getScalarSizeInBits() == 1 &&

      (match(this, m_LogicalAnd(m_VPValue(Op0), m_VPValue(Op1))) ||

       match(this, m_LogicalOr(m_VPValue(Op0), m_VPValue(Op1))))) {

    // select x, y, false --> x & y

    // select x, true, y --> x | y

    const auto [Op1VK, Op1VP] = Ctx.getOperandInfo(Op0);

    const auto [Op2VK, Op2VP] = Ctx.getOperandInfo(Op1);


    SmallVector<const Value *, 2> Operands;

    if (all_of(operands(),

               [](VPValue *Op) { return Op->getUnderlyingValue(); }))

      Operands.append(SI->op_begin(), SI->op_end());

    bool IsLogicalOr = match(this, m_LogicalOr(m_VPValue(Op0), m_VPValue(Op1)));

    return Ctx.TTI.getArithmeticInstrCost(

        IsLogicalOr ? Instruction::Or : Instruction::And, VectorTy,

        Ctx.CostKind, {Op1VK, Op1VP}, {Op2VK, Op2VP}, Operands, SI);

  }


  Type *CondTy = Ctx.Types.inferScalarType(getOperand(0));

  if (!ScalarCond)

    CondTy = VectorType::get(CondTy, VF);


  CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;

  if (auto *Cmp = dyn_cast<CmpInst>(SI->getCondition()))

    Pred = Cmp->getPredicate();

  return Ctx.TTI.getCmpSelInstrCost(

      Instruction::Select, VectorTy, CondTy, Pred, Ctx.CostKind,

      {TTI::OK_AnyValue, TTI::OP_None}, {TTI::OK_AnyValue, TTI::OP_None}, SI);

}


VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy(

    const FastMathFlags &FMF) {

  AllowReassoc = FMF.allowReassoc();

  NoNaNs = FMF.noNaNs();

  NoInfs = FMF.noInfs();

  NoSignedZeros = FMF.noSignedZeros();

  AllowReciprocal = FMF.allowReciprocal();

  AllowContract = FMF.allowContract();

  ApproxFunc = FMF.approxFunc();

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPRecipeWithIRFlags::printFlags(raw_ostream &O) const {

  switch (OpType) {

  case OperationType::Cmp:

    O << " " << CmpInst::getPredicateName(getPredicate());

    break;

  case OperationType::DisjointOp:

    if (DisjointFlags.IsDisjoint)

      O << " disjoint";

    break;

  case OperationType::PossiblyExactOp:

    if (ExactFlags.IsExact)

      O << " exact";

    break;

  case OperationType::OverflowingBinOp:

    if (WrapFlags.HasNUW)

      O << " nuw";

    if (WrapFlags.HasNSW)

      O << " nsw";

    break;

  case OperationType::FPMathOp:

    getFastMathFlags().print(O);

    break;

  case OperationType::GEPOp:

    if (GEPFlags.isInBounds())

      O << " inbounds";

    else if (GEPFlags.hasNoUnsignedSignedWrap())

      O << " nusw";

    if (GEPFlags.hasNoUnsignedWrap())

      O << " nuw";

    break;

  case OperationType::NonNegOp:

    if (NonNegFlags.NonNeg)

      O << " nneg";

    break;

  case OperationType::Other:

    break;

  }

  if (getNumOperands() > 0)

    O << " ";

}

#endif


void VPWidenRecipe::execute(VPTransformState &State) {

  State.setDebugLocFrom(getDebugLoc());

  auto &Builder = State.Builder;

  switch (Opcode) {

  case Instruction::Call:

  case Instruction::Br:

  case Instruction::PHI:

  case Instruction::GetElementPtr:

  case Instruction::Select:

    llvm_unreachable("This instruction is handled by a different recipe.");

  case Instruction::UDiv:

  case Instruction::SDiv:

  case Instruction::SRem:

  case Instruction::URem:

  case Instruction::Add:

  case Instruction::FAdd:

  case Instruction::Sub:

  case Instruction::FSub:

  case Instruction::FNeg:

  case Instruction::Mul:

  case Instruction::FMul:

  case Instruction::FDiv:

  case Instruction::FRem:

  case Instruction::Shl:

  case Instruction::LShr:

  case Instruction::AShr:

  case Instruction::And:

  case Instruction::Or:

  case Instruction::Xor: {

    // Just widen unops and binops.

    SmallVector<Value *, 2> Ops;

    for (VPValue *VPOp : operands())

      Ops.push_back(State.get(VPOp));


    Value *V = Builder.CreateNAryOp(Opcode, Ops);


    if (auto *VecOp = dyn_cast<Instruction>(V))

      setFlags(VecOp);


    // Use this vector value for all users of the original instruction.

    State.set(this, V);

    State.addMetadata(V, dyn_cast_or_null<Instruction>(getUnderlyingValue()));

    break;

  }

  case Instruction::Freeze: {

    Value *Op = State.get(getOperand(0));


    Value *Freeze = Builder.CreateFreeze(Op);

    State.set(this, Freeze);

    break;

  }

  case Instruction::ICmp:

  case Instruction::FCmp: {

    // Widen compares. Generate vector compares.

    bool FCmp = Opcode == Instruction::FCmp;

    Value *A = State.get(getOperand(0));

    Value *B = State.get(getOperand(1));

    Value *C = nullptr;

    if (FCmp) {

      // Propagate fast math flags.

      C = Builder.CreateFCmpFMF(

          getPredicate(), A, B,

          dyn_cast_or_null<Instruction>(getUnderlyingValue()));

    } else {

      C = Builder.CreateICmp(getPredicate(), A, B);

    }

    State.set(this, C);

    State.addMetadata(C, dyn_cast_or_null<Instruction>(getUnderlyingValue()));

    break;

  }

  default:

    // This instruction is not vectorized by simple widening.

    LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : "

                      << Instruction::getOpcodeName(Opcode));

    llvm_unreachable("Unhandled instruction!");

  } // end of switch.


#if !defined(NDEBUG)

  // Verify that VPlan type inference results agree with the type of the

  // generated values.

  assert(VectorType::get(State.TypeAnalysis.inferScalarType(this), State.VF) ==

             State.get(this)->getType() &&

         "inferred type and type from generated instructions do not match");

#endif

}


InstructionCost VPWidenRecipe::computeCost(ElementCount VF,

                                           VPCostContext &Ctx) const {

  switch (Opcode) {

  case Instruction::FNeg: {

    Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);

    return Ctx.TTI.getArithmeticInstrCost(

        Opcode, VectorTy, Ctx.CostKind,

        {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},

        {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None});

  }


  case Instruction::UDiv:

  case Instruction::SDiv:

  case Instruction::SRem:

  case Instruction::URem:

    // More complex computation, let the legacy cost-model handle this for now.

    return Ctx.getLegacyCost(cast<Instruction>(getUnderlyingValue()), VF);

  case Instruction::Add:

  case Instruction::FAdd:

  case Instruction::Sub:

  case Instruction::FSub:

  case Instruction::Mul:

  case Instruction::FMul:

  case Instruction::FDiv:

  case Instruction::FRem:

  case Instruction::Shl:

  case Instruction::LShr:

  case Instruction::AShr:

  case Instruction::And:

  case Instruction::Or:

  case Instruction::Xor: {

    VPValue *RHS = getOperand(1);

    // Certain instructions can be cheaper to vectorize if they have a constant

    // second vector operand. One example of this are shifts on x86.

    TargetTransformInfo::OperandValueInfo RHSInfo = {

        TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None};

    if (RHS->isLiveIn())

      RHSInfo = Ctx.TTI.getOperandInfo(RHS->getLiveInIRValue());


    if (RHSInfo.Kind == TargetTransformInfo::OK_AnyValue &&

        getOperand(1)->isDefinedOutsideLoopRegions())

      RHSInfo.Kind = TargetTransformInfo::OK_UniformValue;

    Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);

    Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());


    SmallVector<const Value *, 4> Operands;

    if (CtxI)

      Operands.append(CtxI->value_op_begin(), CtxI->value_op_end());

    return Ctx.TTI.getArithmeticInstrCost(

        Opcode, VectorTy, Ctx.CostKind,

        {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},

        RHSInfo, Operands, CtxI, &Ctx.TLI);

  }

  case Instruction::Freeze: {

    // This opcode is unknown. Assume that it is the same as 'mul'.

    Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);

    return Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy,

                                          Ctx.CostKind);

  }

  case Instruction::ICmp:

  case Instruction::FCmp: {

    Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());

    Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);

    return Ctx.TTI.getCmpSelInstrCost(Opcode, VectorTy, nullptr, getPredicate(),

                                      Ctx.CostKind,

                                      {TTI::OK_AnyValue, TTI::OP_None},

                                      {TTI::OK_AnyValue, TTI::OP_None}, CtxI);

  }

  default:

    llvm_unreachable("Unsupported opcode for instruction");

  }

}


void VPWidenEVLRecipe::execute(VPTransformState &State) {

  unsigned Opcode = getOpcode();

  // TODO: Support other opcodes

  if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode))

    llvm_unreachable("Unsupported opcode in VPWidenEVLRecipe::execute");


  State.setDebugLocFrom(getDebugLoc());


  assert(State.get(getOperand(0))->getType()->isVectorTy() &&

         "VPWidenEVLRecipe should not be used for scalars");


  VPValue *EVL = getEVL();

  Value *EVLArg = State.get(EVL, /*NeedsScalar=*/true);

  IRBuilderBase &BuilderIR = State.Builder;

  VectorBuilder Builder(BuilderIR);

  Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());


  SmallVector<Value *, 4> Ops;

  for (unsigned I = 0, E = getNumOperands() - 1; I < E; ++I) {

    VPValue *VPOp = getOperand(I);

    Ops.push_back(State.get(VPOp));

  }


  Builder.setMask(Mask).setEVL(EVLArg);

  Value *VPInst =

      Builder.createVectorInstruction(Opcode, Ops[0]->getType(), Ops, "vp.op");

  // Currently vp-intrinsics only accept FMF flags.

  // TODO: Enable other flags when support is added.

  if (isa<FPMathOperator>(VPInst))

    setFlags(cast<Instruction>(VPInst));


  State.set(this, VPInst);

  State.addMetadata(VPInst,

                    dyn_cast_or_null<Instruction>(getUnderlyingValue()));

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,

                          VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN ";

  printAsOperand(O, SlotTracker);

  O << " = " << Instruction::getOpcodeName(Opcode);

  printFlags(O);

  printOperands(O, SlotTracker);

}


void VPWidenEVLRecipe::print(raw_ostream &O, const Twine &Indent,

                             VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN ";

  printAsOperand(O, SlotTracker);

  O << " = vp." << Instruction::getOpcodeName(getOpcode());

  printFlags(O);

  printOperands(O, SlotTracker);

}

#endif


void VPWidenCastRecipe::execute(VPTransformState &State) {

  State.setDebugLocFrom(getDebugLoc());

  auto &Builder = State.Builder;

  /// Vectorize casts.

  assert(State.VF.isVector() && "Not vectorizing?");

  Type *DestTy = VectorType::get(getResultType(), State.VF);

  VPValue *Op = getOperand(0);

  Value *A = State.get(Op);

  Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);

  State.set(this, Cast);

  State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue()));

  if (auto *CastOp = dyn_cast<Instruction>(Cast))

    setFlags(CastOp);

}


InstructionCost VPWidenCastRecipe::computeCost(ElementCount VF,

                                               VPCostContext &Ctx) const {

  // TODO: In some cases, VPWidenCastRecipes are created but not considered in

  // the legacy cost model, including truncates/extends when evaluating a

  // reduction in a smaller type.

  if (!getUnderlyingValue())

    return 0;

  // Computes the CastContextHint from a recipes that may access memory.

  auto ComputeCCH = [&](const VPRecipeBase *R) -> TTI::CastContextHint {

    if (VF.isScalar())

      return TTI::CastContextHint::Normal;

    if (isa<VPInterleaveRecipe>(R))

      return TTI::CastContextHint::Interleave;

    if (const auto *ReplicateRecipe = dyn_cast<VPReplicateRecipe>(R))

      return ReplicateRecipe->isPredicated() ? TTI::CastContextHint::Masked

                                             : TTI::CastContextHint::Normal;

    const auto *WidenMemoryRecipe = dyn_cast<VPWidenMemoryRecipe>(R);

    if (WidenMemoryRecipe == nullptr)

      return TTI::CastContextHint::None;

    if (!WidenMemoryRecipe->isConsecutive())

      return TTI::CastContextHint::GatherScatter;

    if (WidenMemoryRecipe->isReverse())

      return TTI::CastContextHint::Reversed;

    if (WidenMemoryRecipe->isMasked())

      return TTI::CastContextHint::Masked;

    return TTI::CastContextHint::Normal;

  };


  VPValue *Operand = getOperand(0);

  TTI::CastContextHint CCH = TTI::CastContextHint::None;

  // For Trunc/FPTrunc, get the context from the only user.

  if ((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&

      !hasMoreThanOneUniqueUser() && getNumUsers() > 0) {

    if (auto *StoreRecipe = dyn_cast<VPRecipeBase>(*user_begin()))

      CCH = ComputeCCH(StoreRecipe);

  }

  // For Z/Sext, get the context from the operand.

  else if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt ||

           Opcode == Instruction::FPExt) {

    if (Operand->isLiveIn())

      CCH = TTI::CastContextHint::Normal;

    else if (Operand->getDefiningRecipe())

      CCH = ComputeCCH(Operand->getDefiningRecipe());

  }


  auto *SrcTy =

      cast<VectorType>(toVectorTy(Ctx.Types.inferScalarType(Operand), VF));

  auto *DestTy = cast<VectorType>(toVectorTy(getResultType(), VF));

  // Arm TTI will use the underlying instruction to determine the cost.

  return Ctx.TTI.getCastInstrCost(

      Opcode, DestTy, SrcTy, CCH, Ctx.CostKind,

      dyn_cast_if_present<Instruction>(getUnderlyingValue()));

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent,

                              VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN-CAST ";

  printAsOperand(O, SlotTracker);

  O << " = " << Instruction::getOpcodeName(Opcode);

  printFlags(O);

  printOperands(O, SlotTracker);

  O << " to " << *getResultType();

}

#endif


InstructionCost VPHeaderPHIRecipe::computeCost(ElementCount VF,

                                               VPCostContext &Ctx) const {

  return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);

}


/// This function adds

/// (0 * Step, 1 * Step, 2 * Step, ...)

/// to each vector element of Val.

/// \p Opcode is relevant for FP induction variable.

static Value *getStepVector(Value *Val, Value *Step,

                            Instruction::BinaryOps BinOp, ElementCount VF,

                            IRBuilderBase &Builder) {

  assert(VF.isVector() && "only vector VFs are supported");


  // Create and check the types.

  auto *ValVTy = cast<VectorType>(Val->getType());

  ElementCount VLen = ValVTy->getElementCount();


  Type *STy = Val->getType()->getScalarType();

  assert((STy->isIntegerTy() || STy->isFloatingPointTy()) &&

         "Induction Step must be an integer or FP");

  assert(Step->getType() == STy && "Step has wrong type");


  SmallVector<Constant *, 8> Indices;


  // Create a vector of consecutive numbers from zero to VF.

  VectorType *InitVecValVTy = ValVTy;

  if (STy->isFloatingPointTy()) {

    Type *InitVecValSTy =

        IntegerType::get(STy->getContext(), STy->getScalarSizeInBits());

    InitVecValVTy = VectorType::get(InitVecValSTy, VLen);

  }

  Value *InitVec = Builder.CreateStepVector(InitVecValVTy);


  if (STy->isIntegerTy()) {

    Step = Builder.CreateVectorSplat(VLen, Step);

    assert(Step->getType() == Val->getType() && "Invalid step vec");

    // FIXME: The newly created binary instructions should contain nsw/nuw

    // flags, which can be found from the original scalar operations.

    Step = Builder.CreateMul(InitVec, Step);

    return Builder.CreateAdd(Val, Step, "induction");

  }


  // Floating point induction.

  assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&

         "Binary Opcode should be specified for FP induction");

  InitVec = Builder.CreateUIToFP(InitVec, ValVTy);


  Step = Builder.CreateVectorSplat(VLen, Step);

  Value *MulOp = Builder.CreateFMul(InitVec, Step);

  return Builder.CreateBinOp(BinOp, Val, MulOp, "induction");

}


/// A helper function that returns an integer or floating-point constant with

/// value C.

static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) {

  return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)

                           : ConstantFP::get(Ty, C);

}


void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {

  assert(!State.Lane && "Int or FP induction being replicated.");


  Value *Start = getStartValue()->getLiveInIRValue();

  const InductionDescriptor &ID = getInductionDescriptor();

  TruncInst *Trunc = getTruncInst();

  IRBuilderBase &Builder = State.Builder;

  assert(getPHINode()->getType() == ID.getStartValue()->getType() &&

         "Types must match");

  assert(State.VF.isVector() && "must have vector VF");


  // The value from the original loop to which we are mapping the new induction

  // variable.

  Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : getPHINode();


  // Fast-math-flags propagate from the original induction instruction.

  IRBuilder<>::FastMathFlagGuard FMFG(Builder);

  if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))

    Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());


  // Now do the actual transformations, and start with fetching the step value.

  Value *Step = State.get(getStepValue(), VPLane(0));


  assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&

         "Expected either an induction phi-node or a truncate of it!");


  // Construct the initial value of the vector IV in the vector loop preheader

  auto CurrIP = Builder.saveIP();

  BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);

  Builder.SetInsertPoint(VectorPH->getTerminator());

  if (isa<TruncInst>(EntryVal)) {

    assert(Start->getType()->isIntegerTy() &&

           "Truncation requires an integer type");

    auto *TruncType = cast<IntegerType>(EntryVal->getType());

    Step = Builder.CreateTrunc(Step, TruncType);

    Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);

  }


  Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);

  Value *SteppedStart = getStepVector(SplatStart, Step, ID.getInductionOpcode(),

                                      State.VF, State.Builder);


  // We create vector phi nodes for both integer and floating-point induction

  // variables. Here, we determine the kind of arithmetic we will perform.

  Instruction::BinaryOps AddOp;

  Instruction::BinaryOps MulOp;

  if (Step->getType()->isIntegerTy()) {

    AddOp = Instruction::Add;

    MulOp = Instruction::Mul;

  } else {

    AddOp = ID.getInductionOpcode();

    MulOp = Instruction::FMul;

  }


  Value *SplatVF;

  if (VPValue *SplatVFOperand = getSplatVFValue()) {

    // The recipe has been unrolled. In that case, fetch the splat value for the

    // induction increment.

    SplatVF = State.get(SplatVFOperand);

  } else {

    // Multiply the vectorization factor by the step using integer or

    // floating-point arithmetic as appropriate.

    Type *StepType = Step->getType();

    Value *RuntimeVF = State.get(getVFValue(), VPLane(0));

    if (Step->getType()->isFloatingPointTy())

      RuntimeVF = Builder.CreateUIToFP(RuntimeVF, StepType);

    else

      RuntimeVF = Builder.CreateZExtOrTrunc(RuntimeVF, StepType);

    Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);


    // Create a vector splat to use in the induction update.

    SplatVF = Builder.CreateVectorSplat(State.VF, Mul);

  }


  Builder.restoreIP(CurrIP);


  // We may need to add the step a number of times, depending on the unroll

  // factor. The last of those goes into the PHI.

  PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind");

  VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());

  VecInd->setDebugLoc(getDebugLoc());

  State.set(this, VecInd);


  Instruction *LastInduction = cast<Instruction>(

      Builder.CreateBinOp(AddOp, VecInd, SplatVF, "vec.ind.next"));

  if (isa<TruncInst>(EntryVal))

    State.addMetadata(LastInduction, EntryVal);

  LastInduction->setDebugLoc(getDebugLoc());


  VecInd->addIncoming(SteppedStart, VectorPH);

  // Add induction update using an incorrect block temporarily. The phi node

  // will be fixed after VPlan execution. Note that at this point the latch

  // block cannot be used, as it does not exist yet.

  // TODO: Model increment value in VPlan, by turning the recipe into a

  // multi-def and a subclass of VPHeaderPHIRecipe.

  VecInd->addIncoming(LastInduction, VectorPH);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,

                                          VPSlotTracker &SlotTracker) const {

  O << Indent;

  printAsOperand(O, SlotTracker);

  O << " = WIDEN-INDUCTION  ";

  printOperands(O, SlotTracker);


  if (auto *TI = getTruncInst())

    O << " (truncated to " << *TI->getType() << ")";

}

#endif


bool VPWidenIntOrFpInductionRecipe::isCanonical() const {

  // The step may be defined by a recipe in the preheader (e.g. if it requires

  // SCEV expansion), but for the canonical induction the step is required to be

  // 1, which is represented as live-in.

  if (getStepValue()->getDefiningRecipe())

    return false;

  auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue());

  auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());

  auto *CanIV = cast<VPCanonicalIVPHIRecipe>(&*getParent()->begin());

  return StartC && StartC->isZero() && StepC && StepC->isOne() &&

         getScalarType() == CanIV->getScalarType();

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPDerivedIVRecipe::print(raw_ostream &O, const Twine &Indent,

                              VPSlotTracker &SlotTracker) const {

  O << Indent;

  printAsOperand(O, SlotTracker);

  O << " = DERIVED-IV ";

  getStartValue()->printAsOperand(O, SlotTracker);

  O << " + ";

  getOperand(1)->printAsOperand(O, SlotTracker);

  O << " * ";

  getStepValue()->printAsOperand(O, SlotTracker);

}

#endif


void VPScalarIVStepsRecipe::execute(VPTransformState &State) {

  // Fast-math-flags propagate from the original induction instruction.

  IRBuilder<>::FastMathFlagGuard FMFG(State.Builder);

  if (hasFastMathFlags())

    State.Builder.setFastMathFlags(getFastMathFlags());


  /// Compute scalar induction steps. \p ScalarIV is the scalar induction

  /// variable on which to base the steps, \p Step is the size of the step.


  Value *BaseIV = State.get(getOperand(0), VPLane(0));

  Value *Step = State.get(getStepValue(), VPLane(0));

  IRBuilderBase &Builder = State.Builder;


  // Ensure step has the same type as that of scalar IV.

  Type *BaseIVTy = BaseIV->getType()->getScalarType();

  assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!");


  // We build scalar steps for both integer and floating-point induction

  // variables. Here, we determine the kind of arithmetic we will perform.

  Instruction::BinaryOps AddOp;

  Instruction::BinaryOps MulOp;

  if (BaseIVTy->isIntegerTy()) {

    AddOp = Instruction::Add;

    MulOp = Instruction::Mul;

  } else {

    AddOp = InductionOpcode;

    MulOp = Instruction::FMul;

  }


  // Determine the number of scalars we need to generate for each unroll

  // iteration.

  bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);

  // Compute the scalar steps and save the results in State.

  Type *IntStepTy =

      IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());

  Type *VecIVTy = nullptr;

  Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr;

  if (!FirstLaneOnly && State.VF.isScalable()) {

    VecIVTy = VectorType::get(BaseIVTy, State.VF);

    UnitStepVec =

        Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF));

    SplatStep = Builder.CreateVectorSplat(State.VF, Step);

    SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV);

  }


  unsigned StartLane = 0;

  unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();

  if (State.Lane) {

    StartLane = State.Lane->getKnownLane();

    EndLane = StartLane + 1;

  }

  Value *StartIdx0 =

      createStepForVF(Builder, IntStepTy, State.VF, getUnrollPart(*this));


  if (!FirstLaneOnly && State.VF.isScalable()) {

    auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);

    auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);

    if (BaseIVTy->isFloatingPointTy())

      InitVec = Builder.CreateSIToFP(InitVec, VecIVTy);

    auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep);

    auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul);

    State.set(this, Add);

    // It's useful to record the lane values too for the known minimum number

    // of elements so we do those below. This improves the code quality when

    // trying to extract the first element, for example.

  }


  if (BaseIVTy->isFloatingPointTy())

    StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);


  for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {

    Value *StartIdx = Builder.CreateBinOp(

        AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane));

    // The step returned by `createStepForVF` is a runtime-evaluated value

    // when VF is scalable. Otherwise, it should be folded into a Constant.

    assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&

           "Expected StartIdx to be folded to a constant when VF is not "

           "scalable");

    auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);

    auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);

    State.set(this, Add, VPLane(Lane));

  }

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent,

                                  VPSlotTracker &SlotTracker) const {

  O << Indent;

  printAsOperand(O, SlotTracker);

  O << " = SCALAR-STEPS ";

  printOperands(O, SlotTracker);

}

#endif


void VPWidenGEPRecipe::execute(VPTransformState &State) {

  assert(State.VF.isVector() && "not widening");

  auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());

  // Construct a vector GEP by widening the operands of the scalar GEP as

  // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP

  // results in a vector of pointers when at least one operand of the GEP

  // is vector-typed. Thus, to keep the representation compact, we only use

  // vector-typed operands for loop-varying values.


  if (areAllOperandsInvariant()) {

    // If we are vectorizing, but the GEP has only loop-invariant operands,

    // the GEP we build (by only using vector-typed operands for

    // loop-varying values) would be a scalar pointer. Thus, to ensure we

    // produce a vector of pointers, we need to either arbitrarily pick an

    // operand to broadcast, or broadcast a clone of the original GEP.

    // Here, we broadcast a clone of the original.

    //

    // TODO: If at some point we decide to scalarize instructions having

    //       loop-invariant operands, this special case will no longer be

    //       required. We would add the scalarization decision to

    //       collectLoopScalars() and teach getVectorValue() to broadcast

    //       the lane-zero scalar value.

    SmallVector<Value *> Ops;

    for (unsigned I = 0, E = getNumOperands(); I != E; I++)

      Ops.push_back(State.get(getOperand(I), VPLane(0)));


    auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],

                                           ArrayRef(Ops).drop_front(), "",

                                           getGEPNoWrapFlags());

    Value *Splat = State.Builder.CreateVectorSplat(State.VF, NewGEP);

    State.set(this, Splat);

    State.addMetadata(Splat, GEP);

  } else {

    // If the GEP has at least one loop-varying operand, we are sure to

    // produce a vector of pointers unless VF is scalar.

    // The pointer operand of the new GEP. If it's loop-invariant, we

    // won't broadcast it.

    auto *Ptr = isPointerLoopInvariant() ? State.get(getOperand(0), VPLane(0))

                                         : State.get(getOperand(0));


    // Collect all the indices for the new GEP. If any index is

    // loop-invariant, we won't broadcast it.

    SmallVector<Value *, 4> Indices;

    for (unsigned I = 1, E = getNumOperands(); I < E; I++) {

      VPValue *Operand = getOperand(I);

      if (isIndexLoopInvariant(I - 1))

        Indices.push_back(State.get(Operand, VPLane(0)));

      else

        Indices.push_back(State.get(Operand));

    }


    // Create the new GEP. Note that this GEP may be a scalar if VF == 1,

    // but it should be a vector, otherwise.

    auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,

                                           Indices, "", getGEPNoWrapFlags());

    assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&

           "NewGEP is not a pointer vector");

    State.set(this, NewGEP);

    State.addMetadata(NewGEP, GEP);

  }

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,

                             VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN-GEP ";

  O << (isPointerLoopInvariant() ? "Inv" : "Var");

  for (size_t I = 0; I < getNumOperands() - 1; ++I)

    O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]";


  O << " ";

  printAsOperand(O, SlotTracker);

  O << " = getelementptr";

  printFlags(O);

  printOperands(O, SlotTracker);

}

#endif


static Type *getGEPIndexTy(bool IsScalable, bool IsReverse,

                           unsigned CurrentPart, IRBuilderBase &Builder) {

  // Use i32 for the gep index type when the value is constant,

  // or query DataLayout for a more suitable index type otherwise.

  const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();

  return IsScalable && (IsReverse || CurrentPart > 0)

             ? DL.getIndexType(Builder.getPtrTy(0))

             : Builder.getInt32Ty();

}


void VPReverseVectorPointerRecipe::execute(VPTransformState &State) {

  auto &Builder = State.Builder;

  State.setDebugLocFrom(getDebugLoc());

  unsigned CurrentPart = getUnrollPart(*this);

  Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ true,

                                CurrentPart, Builder);


  // The wide store needs to start at the last vector element.

  Value *RunTimeVF = State.get(getVFValue(), VPLane(0));

  if (IndexTy != RunTimeVF->getType())

    RunTimeVF = Builder.CreateZExtOrTrunc(RunTimeVF, IndexTy);

  // NumElt = -CurrentPart * RunTimeVF

  Value *NumElt = Builder.CreateMul(

      ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF);

  // LastLane = 1 - RunTimeVF

  Value *LastLane = Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);

  Value *Ptr = State.get(getOperand(0), VPLane(0));

  Value *ResultPtr =

      Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", getGEPNoWrapFlags());

  ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "",

                                getGEPNoWrapFlags());


  State.set(this, ResultPtr, /*IsScalar*/ true);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPReverseVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,

                                         VPSlotTracker &SlotTracker) const {

  O << Indent;

  printAsOperand(O, SlotTracker);

  O << " = reverse-vector-pointer";

  printFlags(O);

  printOperands(O, SlotTracker);

}

#endif


void VPVectorPointerRecipe::execute(VPTransformState &State) {

  auto &Builder = State.Builder;

  State.setDebugLocFrom(getDebugLoc());

  unsigned CurrentPart = getUnrollPart(*this);

  Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ false,

                                CurrentPart, Builder);

  Value *Ptr = State.get(getOperand(0), VPLane(0));


  Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);

  Value *ResultPtr =

      Builder.CreateGEP(IndexedTy, Ptr, Increment, "", getGEPNoWrapFlags());


  State.set(this, ResultPtr, /*IsScalar*/ true);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,

                                  VPSlotTracker &SlotTracker) const {

  O << Indent;

  printAsOperand(O, SlotTracker);

  O << " = vector-pointer ";


  printOperands(O, SlotTracker);

}

#endif


void VPBlendRecipe::execute(VPTransformState &State) {

  assert(isNormalized() && "Expected blend to be normalized!");

  State.setDebugLocFrom(getDebugLoc());

  // We know that all PHIs in non-header blocks are converted into

  // selects, so we don't have to worry about the insertion order and we

  // can just use the builder.

  // At this point we generate the predication tree. There may be

  // duplications since this is a simple recursive scan, but future

  // optimizations will clean it up.


  unsigned NumIncoming = getNumIncomingValues();


  // Generate a sequence of selects of the form:

  // SELECT(Mask3, In3,

  //        SELECT(Mask2, In2,

  //               SELECT(Mask1, In1,

  //                      In0)))

  // Note that Mask0 is never used: lanes for which no path reaches this phi and

  // are essentially undef are taken from In0.

  bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);

  Value *Result = nullptr;

  for (unsigned In = 0; In < NumIncoming; ++In) {

    // We might have single edge PHIs (blocks) - use an identity

    // 'select' for the first PHI operand.

    Value *In0 = State.get(getIncomingValue(In), OnlyFirstLaneUsed);

    if (In == 0)

      Result = In0; // Initialize with the first incoming value.

    else {

      // Select between the current value and the previous incoming edge

      // based on the incoming mask.

      Value *Cond = State.get(getMask(In), OnlyFirstLaneUsed);

      Result = State.Builder.CreateSelect(Cond, In0, Result, "predphi");

    }

  }

  State.set(this, Result, OnlyFirstLaneUsed);

}


InstructionCost VPBlendRecipe::computeCost(ElementCount VF,

                                           VPCostContext &Ctx) const {

  // Handle cases where only the first lane is used the same way as the legacy

  // cost model.

  if (vputils::onlyFirstLaneUsed(this))

    return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);


  Type *ResultTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);

  Type *CmpTy = toVectorTy(Type::getInt1Ty(Ctx.Types.getContext()), VF);

  return (getNumIncomingValues() - 1) *

         Ctx.TTI.getCmpSelInstrCost(Instruction::Select, ResultTy, CmpTy,

                                    CmpInst::BAD_ICMP_PREDICATE, Ctx.CostKind);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,

                          VPSlotTracker &SlotTracker) const {

  O << Indent << "BLEND ";

  printAsOperand(O, SlotTracker);

  O << " =";

  if (getNumIncomingValues() == 1) {

    // Not a User of any mask: not really blending, this is a

    // single-predecessor phi.

    O << " ";

    getIncomingValue(0)->printAsOperand(O, SlotTracker);

  } else {

    for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {

      O << " ";

      getIncomingValue(I)->printAsOperand(O, SlotTracker);

      if (I == 0)

        continue;

      O << "/";

      getMask(I)->printAsOperand(O, SlotTracker);

    }

  }

}

#endif


void VPReductionRecipe::execute(VPTransformState &State) {

  assert(!State.Lane && "Reduction being replicated.");

  Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);

  RecurKind Kind = RdxDesc.getRecurrenceKind();

  // Propagate the fast-math flags carried by the underlying instruction.

  IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);

  State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags());

  State.setDebugLocFrom(getDebugLoc());

  Value *NewVecOp = State.get(getVecOp());

  if (VPValue *Cond = getCondOp()) {

    Value *NewCond = State.get(Cond, State.VF.isScalar());

    VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());

    Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();


    Value *Start;

    if (RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind))

      Start = RdxDesc.getRecurrenceStartValue();

    else

      Start = llvm::getRecurrenceIdentity(Kind, ElementTy,

                                          RdxDesc.getFastMathFlags());

    if (State.VF.isVector())

      Start = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Start);


    Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Start);

    NewVecOp = Select;

  }

  Value *NewRed;

  Value *NextInChain;

  if (IsOrdered) {

    if (State.VF.isVector())

      NewRed =

          createOrderedReduction(State.Builder, RdxDesc, NewVecOp, PrevInChain);

    else

      NewRed = State.Builder.CreateBinOp(

          (Instruction::BinaryOps)RdxDesc.getOpcode(), PrevInChain, NewVecOp);

    PrevInChain = NewRed;

    NextInChain = NewRed;

  } else {

    PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);

    NewRed = createReduction(State.Builder, RdxDesc, NewVecOp);

    if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind))

      NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(),

                                   NewRed, PrevInChain);

    else

      NextInChain = State.Builder.CreateBinOp(

          (Instruction::BinaryOps)RdxDesc.getOpcode(), NewRed, PrevInChain);

  }

  State.set(this, NextInChain, /*IsScalar*/ true);

}


void VPReductionEVLRecipe::execute(VPTransformState &State) {

  assert(!State.Lane && "Reduction being replicated.");


  auto &Builder = State.Builder;

  // Propagate the fast-math flags carried by the underlying instruction.

  IRBuilderBase::FastMathFlagGuard FMFGuard(Builder);

  const RecurrenceDescriptor &RdxDesc = getRecurrenceDescriptor();

  Builder.setFastMathFlags(RdxDesc.getFastMathFlags());


  RecurKind Kind = RdxDesc.getRecurrenceKind();

  Value *Prev = State.get(getChainOp(), /*IsScalar*/ true);

  Value *VecOp = State.get(getVecOp());

  Value *EVL = State.get(getEVL(), VPLane(0));


  VectorBuilder VBuilder(Builder);

  VBuilder.setEVL(EVL);

  Value *Mask;

  // TODO: move the all-true mask generation into VectorBuilder.

  if (VPValue *CondOp = getCondOp())

    Mask = State.get(CondOp);

  else

    Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());

  VBuilder.setMask(Mask);


  Value *NewRed;

  if (isOrdered()) {

    NewRed = createOrderedReduction(VBuilder, RdxDesc, VecOp, Prev);

  } else {

    NewRed = createSimpleReduction(VBuilder, VecOp, RdxDesc);

    if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind))

      NewRed = createMinMaxOp(Builder, Kind, NewRed, Prev);

    else

      NewRed = Builder.CreateBinOp((Instruction::BinaryOps)RdxDesc.getOpcode(),

                                   NewRed, Prev);

  }

  State.set(this, NewRed, /*IsScalar*/ true);

}


InstructionCost VPReductionRecipe::computeCost(ElementCount VF,

                                               VPCostContext &Ctx) const {

  RecurKind RdxKind = RdxDesc.getRecurrenceKind();

  Type *ElementTy = Ctx.Types.inferScalarType(this);

  auto *VectorTy = cast<VectorType>(toVectorTy(ElementTy, VF));

  unsigned Opcode = RdxDesc.getOpcode();


  // TODO: Support any-of and in-loop reductions.

  assert(

      (!RecurrenceDescriptor::isAnyOfRecurrenceKind(RdxKind) ||

       ForceTargetInstructionCost.getNumOccurrences() > 0) &&

      "Any-of reduction not implemented in VPlan-based cost model currently.");

  assert(

      (!cast<VPReductionPHIRecipe>(getOperand(0))->isInLoop() ||

       ForceTargetInstructionCost.getNumOccurrences() > 0) &&

      "In-loop reduction not implemented in VPlan-based cost model currently.");


  assert(ElementTy->getTypeID() == RdxDesc.getRecurrenceType()->getTypeID() &&

         "Inferred type and recurrence type mismatch.");


  // Cost = Reduction cost + BinOp cost

  InstructionCost Cost =

      Ctx.TTI.getArithmeticInstrCost(Opcode, ElementTy, Ctx.CostKind);

  if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind)) {

    Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RdxKind);

    return Cost + Ctx.TTI.getMinMaxReductionCost(

                      Id, VectorTy, RdxDesc.getFastMathFlags(), Ctx.CostKind);

  }


  return Cost + Ctx.TTI.getArithmeticReductionCost(

                    Opcode, VectorTy, RdxDesc.getFastMathFlags(), Ctx.CostKind);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,

                              VPSlotTracker &SlotTracker) const {

  O << Indent << "REDUCE ";

  printAsOperand(O, SlotTracker);

  O << " = ";

  getChainOp()->printAsOperand(O, SlotTracker);

  O << " +";

  if (isa<FPMathOperator>(getUnderlyingInstr()))

    O << getUnderlyingInstr()->getFastMathFlags();

  O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";

  getVecOp()->printAsOperand(O, SlotTracker);

  if (isConditional()) {

    O << ", ";

    getCondOp()->printAsOperand(O, SlotTracker);

  }

  O << ")";

  if (RdxDesc.IntermediateStore)

    O << " (with final reduction value stored in invariant address sank "

         "outside of loop)";

}


void VPReductionEVLRecipe::print(raw_ostream &O, const Twine &Indent,

                                 VPSlotTracker &SlotTracker) const {

  const RecurrenceDescriptor &RdxDesc = getRecurrenceDescriptor();

  O << Indent << "REDUCE ";

  printAsOperand(O, SlotTracker);

  O << " = ";

  getChainOp()->printAsOperand(O, SlotTracker);

  O << " +";

  if (isa<FPMathOperator>(getUnderlyingInstr()))

    O << getUnderlyingInstr()->getFastMathFlags();

  O << " vp.reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";

  getVecOp()->printAsOperand(O, SlotTracker);

  O << ", ";

  getEVL()->printAsOperand(O, SlotTracker);

  if (isConditional()) {

    O << ", ";

    getCondOp()->printAsOperand(O, SlotTracker);

  }

  O << ")";

  if (RdxDesc.IntermediateStore)

    O << " (with final reduction value stored in invariant address sank "

         "outside of loop)";

}

#endif


bool VPReplicateRecipe::shouldPack() const {

  // Find if the recipe is used by a widened recipe via an intervening

  // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector.

  return any_of(users(), [](const VPUser *U) {

    if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U))

      return any_of(PredR->users(), [PredR](const VPUser *U) {

        return !U->usesScalars(PredR);

      });

    return false;

  });

}


InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,

                                               VPCostContext &Ctx) const {

  Instruction *UI = cast<Instruction>(getUnderlyingValue());

  // VPReplicateRecipe may be cloned as part of an existing VPlan-to-VPlan

  // transform, avoid computing their cost multiple times for now.

  Ctx.SkipCostComputation.insert(UI);

  return Ctx.getLegacyCost(UI, VF);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,

                              VPSlotTracker &SlotTracker) const {

  O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");


  if (!getUnderlyingInstr()->getType()->isVoidTy()) {

    printAsOperand(O, SlotTracker);

    O << " = ";

  }

  if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {

    O << "call";

    printFlags(O);

    O << "@" << CB->getCalledFunction()->getName() << "(";

    interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)),

                    O, [&O, &SlotTracker](VPValue *Op) {

                      Op->printAsOperand(O, SlotTracker);

                    });

    O << ")";

  } else {

    O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode());

    printFlags(O);

    printOperands(O, SlotTracker);

  }


  if (shouldPack())

    O << " (S->V)";

}

#endif


Value *VPScalarCastRecipe ::generate(VPTransformState &State) {

  State.setDebugLocFrom(getDebugLoc());

  assert(vputils::onlyFirstLaneUsed(this) &&

         "Codegen only implemented for first lane.");

  switch (Opcode) {

  case Instruction::SExt:

  case Instruction::ZExt:

  case Instruction::Trunc: {

    // Note: SExt/ZExt not used yet.

    Value *Op = State.get(getOperand(0), VPLane(0));

    return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy);

  }

  default:

    llvm_unreachable("opcode not implemented yet");

  }

}


void VPScalarCastRecipe ::execute(VPTransformState &State) {

  State.set(this, generate(State), VPLane(0));

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent,

                                VPSlotTracker &SlotTracker) const {

  O << Indent << "SCALAR-CAST ";

  printAsOperand(O, SlotTracker);

  O << " = " << Instruction::getOpcodeName(Opcode) << " ";

  printOperands(O, SlotTracker);

  O << " to " << *ResultTy;

}

#endif


void VPBranchOnMaskRecipe::execute(VPTransformState &State) {

  assert(State.Lane && "Branch on Mask works only on single instance.");


  Value *ConditionBit = nullptr;

  VPValue *BlockInMask = getMask();

  if (BlockInMask)

    ConditionBit = State.get(BlockInMask, *State.Lane);

  else // Block in mask is all-one.

    ConditionBit = State.Builder.getTrue();


  // Replace the temporary unreachable terminator with a new conditional branch,

  // whose two destinations will be set later when they are created.

  auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();

  assert(isa<UnreachableInst>(CurrentTerminator) &&

         "Expected to replace unreachable terminator with conditional branch.");

  auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);

  CondBr->setSuccessor(0, nullptr);

  ReplaceInstWithInst(CurrentTerminator, CondBr);

}


InstructionCost VPBranchOnMaskRecipe::computeCost(ElementCount VF,

                                                  VPCostContext &Ctx) const {

  // The legacy cost model doesn't assign costs to branches for individual

  // replicate regions. Match the current behavior in the VPlan cost model for

  // now.

  return 0;

}


void VPPredInstPHIRecipe::execute(VPTransformState &State) {

  State.setDebugLocFrom(getDebugLoc());

  assert(State.Lane && "Predicated instruction PHI works per instance.");

  Instruction *ScalarPredInst =

      cast<Instruction>(State.get(getOperand(0), *State.Lane));

  BasicBlock *PredicatedBB = ScalarPredInst->getParent();

  BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();

  assert(PredicatingBB && "Predicated block has no single predecessor.");

  assert(isa<VPReplicateRecipe>(getOperand(0)) &&

         "operand must be VPReplicateRecipe");


  // By current pack/unpack logic we need to generate only a single phi node: if

  // a vector value for the predicated instruction exists at this point it means

  // the instruction has vector users only, and a phi for the vector value is

  // needed. In this case the recipe of the predicated instruction is marked to

  // also do that packing, thereby "hoisting" the insert-element sequence.

  // Otherwise, a phi node for the scalar value is needed.

  if (State.hasVectorValue(getOperand(0))) {

    Value *VectorValue = State.get(getOperand(0));

    InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);

    PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);

    VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.

    VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.

    if (State.hasVectorValue(this))

      State.reset(this, VPhi);

    else

      State.set(this, VPhi);

    // NOTE: Currently we need to update the value of the operand, so the next

    // predicated iteration inserts its generated value in the correct vector.

    State.reset(getOperand(0), VPhi);

  } else {

    if (vputils::onlyFirstLaneUsed(this) && !State.Lane->isFirstLane())

      return;


    Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType();

    PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);

    Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),

                     PredicatingBB);

    Phi->addIncoming(ScalarPredInst, PredicatedBB);

    if (State.hasScalarValue(this, *State.Lane))

      State.reset(this, Phi, *State.Lane);

    else

      State.set(this, Phi, *State.Lane);

    // NOTE: Currently we need to update the value of the operand, so the next

    // predicated iteration inserts its generated value in the correct vector.

    State.reset(getOperand(0), Phi, *State.Lane);

  }

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,

                                VPSlotTracker &SlotTracker) const {

  O << Indent << "PHI-PREDICATED-INSTRUCTION ";

  printAsOperand(O, SlotTracker);

  O << " = ";

  printOperands(O, SlotTracker);

}

#endif


InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,

                                                 VPCostContext &Ctx) const {

  Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF);

  const Align Alignment =

      getLoadStoreAlignment(const_cast<Instruction *>(&Ingredient));

  unsigned AS =

      getLoadStoreAddressSpace(const_cast<Instruction *>(&Ingredient));


  if (!Consecutive) {

    // TODO: Using the original IR may not be accurate.

    // Currently, ARM will use the underlying IR to calculate gather/scatter

    // instruction cost.

    const Value *Ptr = getLoadStorePointerOperand(&Ingredient);

    assert(!Reverse &&

           "Inconsecutive memory access should not have the order.");

    return Ctx.TTI.getAddressComputationCost(Ty) +

           Ctx.TTI.getGatherScatterOpCost(Ingredient.getOpcode(), Ty, Ptr,

                                          IsMasked, Alignment, Ctx.CostKind,

                                          &Ingredient);

  }


  InstructionCost Cost = 0;

  if (IsMasked) {

    Cost += Ctx.TTI.getMaskedMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment,

                                          AS, Ctx.CostKind);

  } else {

    TTI::OperandValueInfo OpInfo =

        Ctx.TTI.getOperandInfo(Ingredient.getOperand(0));

    Cost += Ctx.TTI.getMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment, AS,

                                    Ctx.CostKind, OpInfo, &Ingredient);

  }

  if (!Reverse)

    return Cost;


  return Cost +=

         Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,

                                cast<VectorType>(Ty), {}, Ctx.CostKind, 0);

}


void VPWidenLoadRecipe::execute(VPTransformState &State) {

  auto *LI = cast<LoadInst>(&Ingredient);


  Type *ScalarDataTy = getLoadStoreType(&Ingredient);

  auto *DataTy = VectorType::get(ScalarDataTy, State.VF);

  const Align Alignment = getLoadStoreAlignment(&Ingredient);

  bool CreateGather = !isConsecutive();


  auto &Builder = State.Builder;

  State.setDebugLocFrom(getDebugLoc());

  Value *Mask = nullptr;

  if (auto *VPMask = getMask()) {

    // Mask reversal is only needed for non-all-one (null) masks, as reverse

    // of a null all-one mask is a null mask.

    Mask = State.get(VPMask);

    if (isReverse())

      Mask = Builder.CreateVectorReverse(Mask, "reverse");

  }


  Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateGather);

  Value *NewLI;

  if (CreateGather) {

    NewLI = Builder.CreateMaskedGather(DataTy, Addr, Alignment, Mask, nullptr,

                                       "wide.masked.gather");

  } else if (Mask) {

    NewLI =

        Builder.CreateMaskedLoad(DataTy, Addr, Alignment, Mask,

                                 PoisonValue::get(DataTy), "wide.masked.load");

  } else {

    NewLI = Builder.CreateAlignedLoad(DataTy, Addr, Alignment, "wide.load");

  }

  // Add metadata to the load, but setVectorValue to the reverse shuffle.

  State.addMetadata(NewLI, LI);

  if (Reverse)

    NewLI = Builder.CreateVectorReverse(NewLI, "reverse");

  State.set(this, NewLI);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPWidenLoadRecipe::print(raw_ostream &O, const Twine &Indent,

                              VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN ";

  printAsOperand(O, SlotTracker);

  O << " = load ";

  printOperands(O, SlotTracker);

}

#endif


/// Use all-true mask for reverse rather than actual mask, as it avoids a

/// dependence w/o affecting the result.

static Instruction *createReverseEVL(IRBuilderBase &Builder, Value *Operand,

                                     Value *EVL, const Twine &Name) {

  VectorType *ValTy = cast<VectorType>(Operand->getType());

  Value *AllTrueMask =

      Builder.CreateVectorSplat(ValTy->getElementCount(), Builder.getTrue());

  return Builder.CreateIntrinsic(ValTy, Intrinsic::experimental_vp_reverse,

                                 {Operand, AllTrueMask, EVL}, nullptr, Name);

}


void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {

  auto *LI = cast<LoadInst>(&Ingredient);


  Type *ScalarDataTy = getLoadStoreType(&Ingredient);

  auto *DataTy = VectorType::get(ScalarDataTy, State.VF);

  const Align Alignment = getLoadStoreAlignment(&Ingredient);

  bool CreateGather = !isConsecutive();


  auto &Builder = State.Builder;

  State.setDebugLocFrom(getDebugLoc());

  CallInst *NewLI;

  Value *EVL = State.get(getEVL(), VPLane(0));

  Value *Addr = State.get(getAddr(), !CreateGather);

  Value *Mask = nullptr;

  if (VPValue *VPMask = getMask()) {

    Mask = State.get(VPMask);

    if (isReverse())

      Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");

  } else {

    Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());

  }


  if (CreateGather) {

    NewLI =

        Builder.CreateIntrinsic(DataTy, Intrinsic::vp_gather, {Addr, Mask, EVL},

                                nullptr, "wide.masked.gather");

  } else {

    VectorBuilder VBuilder(Builder);

    VBuilder.setEVL(EVL).setMask(Mask);

    NewLI = cast<CallInst>(VBuilder.createVectorInstruction(

        Instruction::Load, DataTy, Addr, "vp.op.load"));

  }

  NewLI->addParamAttr(

      0, Attribute::getWithAlignment(NewLI->getContext(), Alignment));

  State.addMetadata(NewLI, LI);

  Instruction *Res = NewLI;

  if (isReverse())

    Res = createReverseEVL(Builder, Res, EVL, "vp.reverse");

  State.set(this, Res);

}


InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF,

                                                  VPCostContext &Ctx) const {

  if (!Consecutive || IsMasked)

    return VPWidenMemoryRecipe::computeCost(VF, Ctx);


  // We need to use the getMaskedMemoryOpCost() instead of getMemoryOpCost()

  // here because the EVL recipes using EVL to replace the tail mask. But in the

  // legacy model, it will always calculate the cost of mask.

  // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we

  // don't need to compare to the legacy cost model.

  Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF);

  const Align Alignment =

      getLoadStoreAlignment(const_cast<Instruction *>(&Ingredient));

  unsigned AS =

      getLoadStoreAddressSpace(const_cast<Instruction *>(&Ingredient));

  InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost(

      Ingredient.getOpcode(), Ty, Alignment, AS, Ctx.CostKind);

  if (!Reverse)

    return Cost;


  return Cost + Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,

                                       cast<VectorType>(Ty), {}, Ctx.CostKind,

                                       0);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPWidenLoadEVLRecipe::print(raw_ostream &O, const Twine &Indent,

                                 VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN ";

  printAsOperand(O, SlotTracker);

  O << " = vp.load ";

  printOperands(O, SlotTracker);

}

#endif


void VPWidenStoreRecipe::execute(VPTransformState &State) {

  auto *SI = cast<StoreInst>(&Ingredient);


  VPValue *StoredVPValue = getStoredValue();

  bool CreateScatter = !isConsecutive();

  const Align Alignment = getLoadStoreAlignment(&Ingredient);


  auto &Builder = State.Builder;

  State.setDebugLocFrom(getDebugLoc());


  Value *Mask = nullptr;

  if (auto *VPMask = getMask()) {

    // Mask reversal is only needed for non-all-one (null) masks, as reverse

    // of a null all-one mask is a null mask.

    Mask = State.get(VPMask);

    if (isReverse())

      Mask = Builder.CreateVectorReverse(Mask, "reverse");

  }


  Value *StoredVal = State.get(StoredVPValue);

  if (isReverse()) {

    // If we store to reverse consecutive memory locations, then we need

    // to reverse the order of elements in the stored value.

    StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");

    // We don't want to update the value in the map as it might be used in

    // another expression. So don't call resetVectorValue(StoredVal).

  }

  Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateScatter);

  Instruction *NewSI = nullptr;

  if (CreateScatter)

    NewSI = Builder.CreateMaskedScatter(StoredVal, Addr, Alignment, Mask);

  else if (Mask)

    NewSI = Builder.CreateMaskedStore(StoredVal, Addr, Alignment, Mask);

  else

    NewSI = Builder.CreateAlignedStore(StoredVal, Addr, Alignment);

  State.addMetadata(NewSI, SI);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPWidenStoreRecipe::print(raw_ostream &O, const Twine &Indent,

                               VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN store ";

  printOperands(O, SlotTracker);

}

#endif


void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {

  auto *SI = cast<StoreInst>(&Ingredient);


  VPValue *StoredValue = getStoredValue();

  bool CreateScatter = !isConsecutive();

  const Align Alignment = getLoadStoreAlignment(&Ingredient);


  auto &Builder = State.Builder;

  State.setDebugLocFrom(getDebugLoc());


  CallInst *NewSI = nullptr;

  Value *StoredVal = State.get(StoredValue);

  Value *EVL = State.get(getEVL(), VPLane(0));

  if (isReverse())

    StoredVal = createReverseEVL(Builder, StoredVal, EVL, "vp.reverse");

  Value *Mask = nullptr;

  if (VPValue *VPMask = getMask()) {

    Mask = State.get(VPMask);

    if (isReverse())

      Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");

  } else {

    Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());

  }

  Value *Addr = State.get(getAddr(), !CreateScatter);

  if (CreateScatter) {

    NewSI = Builder.CreateIntrinsic(Type::getVoidTy(EVL->getContext()),

                                    Intrinsic::vp_scatter,

                                    {StoredVal, Addr, Mask, EVL});

  } else {

    VectorBuilder VBuilder(Builder);

    VBuilder.setEVL(EVL).setMask(Mask);

    NewSI = cast<CallInst>(VBuilder.createVectorInstruction(

        Instruction::Store, Type::getVoidTy(EVL->getContext()),

        {StoredVal, Addr}));

  }

  NewSI->addParamAttr(

      1, Attribute::getWithAlignment(NewSI->getContext(), Alignment));

  State.addMetadata(NewSI, SI);

}


InstructionCost VPWidenStoreEVLRecipe::computeCost(ElementCount VF,

                                                   VPCostContext &Ctx) const {

  if (!Consecutive || IsMasked)

    return VPWidenMemoryRecipe::computeCost(VF, Ctx);


  // We need to use the getMaskedMemoryOpCost() instead of getMemoryOpCost()

  // here because the EVL recipes using EVL to replace the tail mask. But in the

  // legacy model, it will always calculate the cost of mask.

  // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we

  // don't need to compare to the legacy cost model.

  Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF);

  const Align Alignment =

      getLoadStoreAlignment(const_cast<Instruction *>(&Ingredient));

  unsigned AS =

      getLoadStoreAddressSpace(const_cast<Instruction *>(&Ingredient));

  InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost(

      Ingredient.getOpcode(), Ty, Alignment, AS, Ctx.CostKind);

  if (!Reverse)

    return Cost;


  return Cost + Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,

                                       cast<VectorType>(Ty), {}, Ctx.CostKind,

                                       0);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPWidenStoreEVLRecipe::print(raw_ostream &O, const Twine &Indent,

                                  VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN vp.store ";

  printOperands(O, SlotTracker);

}

#endif


static Value *createBitOrPointerCast(IRBuilderBase &Builder, Value *V,

                                     VectorType *DstVTy, const DataLayout &DL) {

  // Verify that V is a vector type with same number of elements as DstVTy.

  auto VF = DstVTy->getElementCount();

  auto *SrcVecTy = cast<VectorType>(V->getType());

  assert(VF == SrcVecTy->getElementCount() && "Vector dimensions do not match");

  Type *SrcElemTy = SrcVecTy->getElementType();

  Type *DstElemTy = DstVTy->getElementType();

  assert((DL.getTypeSizeInBits(SrcElemTy) == DL.getTypeSizeInBits(DstElemTy)) &&

         "Vector elements must have same size");


  // Do a direct cast if element types are castable.

  if (CastInst::isBitOrNoopPointerCastable(SrcElemTy, DstElemTy, DL)) {

    return Builder.CreateBitOrPointerCast(V, DstVTy);

  }

  // V cannot be directly casted to desired vector type.

  // May happen when V is a floating point vector but DstVTy is a vector of

  // pointers or vice-versa. Handle this using a two-step bitcast using an

  // intermediate Integer type for the bitcast i.e. Ptr <-> Int <-> Float.

  assert((DstElemTy->isPointerTy() != SrcElemTy->isPointerTy()) &&

         "Only one type should be a pointer type");

  assert((DstElemTy->isFloatingPointTy() != SrcElemTy->isFloatingPointTy()) &&

         "Only one type should be a floating point type");

  Type *IntTy =

      IntegerType::getIntNTy(V->getContext(), DL.getTypeSizeInBits(SrcElemTy));

  auto *VecIntTy = VectorType::get(IntTy, VF);

  Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy);

  return Builder.CreateBitOrPointerCast(CastVal, DstVTy);

}


/// Return a vector containing interleaved elements from multiple

/// smaller input vectors.

static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vals,

                                const Twine &Name) {

  unsigned Factor = Vals.size();

  assert(Factor > 1 && "Tried to interleave invalid number of vectors");


  VectorType *VecTy = cast<VectorType>(Vals[0]->getType());

#ifndef NDEBUG

  for (Value *Val : Vals)

    assert(Val->getType() == VecTy && "Tried to interleave mismatched types");

#endif


  // Scalable vectors cannot use arbitrary shufflevectors (only splats), so

  // must use intrinsics to interleave.

  if (VecTy->isScalableTy()) {

    VectorType *WideVecTy = VectorType::getDoubleElementsVectorType(VecTy);

    return Builder.CreateIntrinsic(WideVecTy, Intrinsic::vector_interleave2,

                                   Vals,

                                   /*FMFSource=*/nullptr, Name);

  }


  // Fixed length. Start by concatenating all vectors into a wide vector.

  Value *WideVec = concatenateVectors(Builder, Vals);


  // Interleave the elements into the wide vector.

  const unsigned NumElts = VecTy->getElementCount().getFixedValue();

  return Builder.CreateShuffleVector(

      WideVec, createInterleaveMask(NumElts, Factor), Name);

}


// Try to vectorize the interleave group that \p Instr belongs to.

//

// E.g. Translate following interleaved load group (factor = 3):

//   for (i = 0; i < N; i+=3) {

//     R = Pic[i];             // Member of index 0

//     G = Pic[i+1];           // Member of index 1

//     B = Pic[i+2];           // Member of index 2

//     ... // do something to R, G, B

//   }

// To:

//   %wide.vec = load <12 x i32>                       ; Read 4 tuples of R,G,B

//   %R.vec = shuffle %wide.vec, poison, <0, 3, 6, 9>   ; R elements

//   %G.vec = shuffle %wide.vec, poison, <1, 4, 7, 10>  ; G elements

//   %B.vec = shuffle %wide.vec, poison, <2, 5, 8, 11>  ; B elements

//

// Or translate following interleaved store group (factor = 3):

//   for (i = 0; i < N; i+=3) {

//     ... do something to R, G, B

//     Pic[i]   = R;           // Member of index 0

//     Pic[i+1] = G;           // Member of index 1

//     Pic[i+2] = B;           // Member of index 2

//   }

// To:

//   %R_G.vec = shuffle %R.vec, %G.vec, <0, 1, 2, ..., 7>

//   %B_U.vec = shuffle %B.vec, poison, <0, 1, 2, 3, u, u, u, u>

//   %interleaved.vec = shuffle %R_G.vec, %B_U.vec,

//        <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>    ; Interleave R,G,B elements

//   store <12 x i32> %interleaved.vec              ; Write 4 tuples of R,G,B

void VPInterleaveRecipe::execute(VPTransformState &State) {

  assert(!State.Lane && "Interleave group being replicated.");

  const InterleaveGroup<Instruction> *Group = IG;

  Instruction *Instr = Group->getInsertPos();


  // Prepare for the vector type of the interleaved load/store.

  Type *ScalarTy = getLoadStoreType(Instr);

  unsigned InterleaveFactor = Group->getFactor();

  auto *VecTy = VectorType::get(ScalarTy, State.VF * InterleaveFactor);


  // TODO: extend the masked interleaved-group support to reversed access.

  VPValue *BlockInMask = getMask();

  assert((!BlockInMask || !Group->isReverse()) &&

         "Reversed masked interleave-group not supported.");


  VPValue *Addr = getAddr();

  Value *ResAddr = State.get(Addr, VPLane(0));

  if (auto *I = dyn_cast<Instruction>(ResAddr))

    State.setDebugLocFrom(I->getDebugLoc());


  // If the group is reverse, adjust the index to refer to the last vector lane

  // instead of the first. We adjust the index from the first vector lane,

  // rather than directly getting the pointer for lane VF - 1, because the

  // pointer operand of the interleaved access is supposed to be uniform.

  if (Group->isReverse()) {

    Value *RuntimeVF =

        getRuntimeVF(State.Builder, State.Builder.getInt32Ty(), State.VF);

    Value *Index =

        State.Builder.CreateSub(RuntimeVF, State.Builder.getInt32(1));

    Index = State.Builder.CreateMul(Index,

                                    State.Builder.getInt32(Group->getFactor()));

    Index = State.Builder.CreateNeg(Index);


    bool InBounds = false;

    if (auto *Gep = dyn_cast<GetElementPtrInst>(ResAddr->stripPointerCasts()))

      InBounds = Gep->isInBounds();

    ResAddr = State.Builder.CreateGEP(ScalarTy, ResAddr, Index, "", InBounds);

  }


  State.setDebugLocFrom(Instr->getDebugLoc());

  Value *PoisonVec = PoisonValue::get(VecTy);


  auto CreateGroupMask = [&BlockInMask, &State,

                          &InterleaveFactor](Value *MaskForGaps) -> Value * {

    if (State.VF.isScalable()) {

      assert(!MaskForGaps && "Interleaved groups with gaps are not supported.");

      assert(InterleaveFactor == 2 &&

             "Unsupported deinterleave factor for scalable vectors");

      auto *ResBlockInMask = State.get(BlockInMask);

      SmallVector<Value *, 2> Ops = {ResBlockInMask, ResBlockInMask};

      auto *MaskTy = VectorType::get(State.Builder.getInt1Ty(),

                                     State.VF.getKnownMinValue() * 2, true);

      return State.Builder.CreateIntrinsic(

          MaskTy, Intrinsic::vector_interleave2, Ops,

          /*FMFSource=*/nullptr, "interleaved.mask");

    }


    if (!BlockInMask)

      return MaskForGaps;


    Value *ResBlockInMask = State.get(BlockInMask);

    Value *ShuffledMask = State.Builder.CreateShuffleVector(

        ResBlockInMask,

        createReplicatedMask(InterleaveFactor, State.VF.getKnownMinValue()),

        "interleaved.mask");

    return MaskForGaps ? State.Builder.CreateBinOp(Instruction::And,

                                                   ShuffledMask, MaskForGaps)

                       : ShuffledMask;

  };


  const DataLayout &DL = Instr->getDataLayout();

  // Vectorize the interleaved load group.

  if (isa<LoadInst>(Instr)) {

    Value *MaskForGaps = nullptr;

    if (NeedsMaskForGaps) {

      MaskForGaps = createBitMaskForGaps(State.Builder,

                                         State.VF.getKnownMinValue(), *Group);

      assert(MaskForGaps && "Mask for Gaps is required but it is null");

    }


    Instruction *NewLoad;

    if (BlockInMask || MaskForGaps) {

      Value *GroupMask = CreateGroupMask(MaskForGaps);

      NewLoad = State.Builder.CreateMaskedLoad(VecTy, ResAddr,

                                               Group->getAlign(), GroupMask,

                                               PoisonVec, "wide.masked.vec");

    } else

      NewLoad = State.Builder.CreateAlignedLoad(VecTy, ResAddr,

                                                Group->getAlign(), "wide.vec");

    Group->addMetadata(NewLoad);


    ArrayRef<VPValue *> VPDefs = definedValues();

    const DataLayout &DL = State.CFG.PrevBB->getDataLayout();

    if (VecTy->isScalableTy()) {

      assert(InterleaveFactor == 2 &&

             "Unsupported deinterleave factor for scalable vectors");


        // Scalable vectors cannot use arbitrary shufflevectors (only splats),

        // so must use intrinsics to deinterleave.

      Value *DI = State.Builder.CreateIntrinsic(

          Intrinsic::vector_deinterleave2, VecTy, NewLoad,

          /*FMFSource=*/nullptr, "strided.vec");

      unsigned J = 0;

      for (unsigned I = 0; I < InterleaveFactor; ++I) {

        Instruction *Member = Group->getMember(I);


        if (!Member)

          continue;


        Value *StridedVec = State.Builder.CreateExtractValue(DI, I);

        // If this member has different type, cast the result type.

        if (Member->getType() != ScalarTy) {

          VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);

          StridedVec =

              createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);

        }


        if (Group->isReverse())

          StridedVec = State.Builder.CreateVectorReverse(StridedVec, "reverse");


        State.set(VPDefs[J], StridedVec);

        ++J;

      }


      return;

    }


    // For each member in the group, shuffle out the appropriate data from the

    // wide loads.

    unsigned J = 0;

    for (unsigned I = 0; I < InterleaveFactor; ++I) {

      Instruction *Member = Group->getMember(I);


      // Skip the gaps in the group.

      if (!Member)

        continue;


      auto StrideMask =

          createStrideMask(I, InterleaveFactor, State.VF.getKnownMinValue());

      Value *StridedVec =

          State.Builder.CreateShuffleVector(NewLoad, StrideMask, "strided.vec");


      // If this member has different type, cast the result type.

      if (Member->getType() != ScalarTy) {

        assert(!State.VF.isScalable() && "VF is assumed to be non scalable.");

        VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);

        StridedVec =

            createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);

      }


      if (Group->isReverse())

        StridedVec = State.Builder.CreateVectorReverse(StridedVec, "reverse");


      State.set(VPDefs[J], StridedVec);

      ++J;

    }

    return;

  }


  // The sub vector type for current instruction.

  auto *SubVT = VectorType::get(ScalarTy, State.VF);


  // Vectorize the interleaved store group.

  Value *MaskForGaps =

      createBitMaskForGaps(State.Builder, State.VF.getKnownMinValue(), *Group);

  assert((!MaskForGaps || !State.VF.isScalable()) &&

         "masking gaps for scalable vectors is not yet supported.");

  ArrayRef<VPValue *> StoredValues = getStoredValues();

  // Collect the stored vector from each member.

  SmallVector<Value *, 4> StoredVecs;

  unsigned StoredIdx = 0;

  for (unsigned i = 0; i < InterleaveFactor; i++) {

    assert((Group->getMember(i) || MaskForGaps) &&

           "Fail to get a member from an interleaved store group");

    Instruction *Member = Group->getMember(i);


    // Skip the gaps in the group.

    if (!Member) {

      Value *Undef = PoisonValue::get(SubVT);

      StoredVecs.push_back(Undef);

      continue;

    }


    Value *StoredVec = State.get(StoredValues[StoredIdx]);

    ++StoredIdx;


    if (Group->isReverse())

      StoredVec = State.Builder.CreateVectorReverse(StoredVec, "reverse");


    // If this member has different type, cast it to a unified type.


    if (StoredVec->getType() != SubVT)

      StoredVec = createBitOrPointerCast(State.Builder, StoredVec, SubVT, DL);


    StoredVecs.push_back(StoredVec);

  }


  // Interleave all the smaller vectors into one wider vector.

  Value *IVec = interleaveVectors(State.Builder, StoredVecs, "interleaved.vec");

  Instruction *NewStoreInstr;

  if (BlockInMask || MaskForGaps) {

    Value *GroupMask = CreateGroupMask(MaskForGaps);

    NewStoreInstr = State.Builder.CreateMaskedStore(

        IVec, ResAddr, Group->getAlign(), GroupMask);

  } else

    NewStoreInstr =

        State.Builder.CreateAlignedStore(IVec, ResAddr, Group->getAlign());


  Group->addMetadata(NewStoreInstr);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent,

                               VPSlotTracker &SlotTracker) const {

  O << Indent << "INTERLEAVE-GROUP with factor " << IG->getFactor() << " at ";

  IG->getInsertPos()->printAsOperand(O, false);

  O << ", ";

  getAddr()->printAsOperand(O, SlotTracker);

  VPValue *Mask = getMask();

  if (Mask) {

    O << ", ";

    Mask->printAsOperand(O, SlotTracker);

  }


  unsigned OpIdx = 0;

  for (unsigned i = 0; i < IG->getFactor(); ++i) {

    if (!IG->getMember(i))

      continue;

    if (getNumStoreOperands() > 0) {

      O << "\n" << Indent << "  store ";

      getOperand(1 + OpIdx)->printAsOperand(O, SlotTracker);

      O << " to index " << i;

    } else {

      O << "\n" << Indent << "  ";

      getVPValue(OpIdx)->printAsOperand(O, SlotTracker);

      O << " = load from index " << i;

    }

    ++OpIdx;

  }

}

#endif


InstructionCost VPInterleaveRecipe::computeCost(ElementCount VF,

                                                VPCostContext &Ctx) const {

  Instruction *InsertPos = getInsertPos();

  // Find the VPValue index of the interleave group. We need to skip gaps.

  unsigned InsertPosIdx = 0;

  for (unsigned Idx = 0; IG->getFactor(); ++Idx)

    if (auto *Member = IG->getMember(Idx)) {

      if (Member == InsertPos)

        break;

      InsertPosIdx++;

    }

  Type *ValTy = Ctx.Types.inferScalarType(

      getNumDefinedValues() > 0 ? getVPValue(InsertPosIdx)

                                : getStoredValues()[InsertPosIdx]);

  auto *VectorTy = cast<VectorType>(toVectorTy(ValTy, VF));

  unsigned AS = getLoadStoreAddressSpace(InsertPos);


  unsigned InterleaveFactor = IG->getFactor();

  auto *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor);


  // Holds the indices of existing members in the interleaved group.

  SmallVector<unsigned, 4> Indices;

  for (unsigned IF = 0; IF < InterleaveFactor; IF++)

    if (IG->getMember(IF))

      Indices.push_back(IF);


  // Calculate the cost of the whole interleaved group.

  InstructionCost Cost = Ctx.TTI.getInterleavedMemoryOpCost(

      InsertPos->getOpcode(), WideVecTy, IG->getFactor(), Indices,

      IG->getAlign(), AS, Ctx.CostKind, getMask(), NeedsMaskForGaps);


  if (!IG->isReverse())

    return Cost;


  return Cost + IG->getNumMembers() *

                    Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,

                                           VectorTy, std::nullopt, Ctx.CostKind,

                                           0);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,

                                   VPSlotTracker &SlotTracker) const {

  O << Indent << "EMIT ";

  printAsOperand(O, SlotTracker);

  O << " = CANONICAL-INDUCTION ";

  printOperands(O, SlotTracker);

}

#endif


bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(bool IsScalable) {

  return IsScalarAfterVectorization &&

         (!IsScalable || vputils::onlyFirstLaneUsed(this));

}


void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {

  assert(getInductionDescriptor().getKind() ==

             InductionDescriptor::IK_PtrInduction &&

         "Not a pointer induction according to InductionDescriptor!");

  assert(cast<PHINode>(getUnderlyingInstr())->getType()->isPointerTy() &&

         "Unexpected type.");

  assert(!onlyScalarsGenerated(State.VF.isScalable()) &&

         "Recipe should have been replaced");


  unsigned CurrentPart = getUnrollPart(*this);


  // Build a pointer phi

  Value *ScalarStartValue = getStartValue()->getLiveInIRValue();

  Type *ScStValueType = ScalarStartValue->getType();


  BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);

  PHINode *NewPointerPhi = nullptr;

  if (CurrentPart == 0) {

    auto *IVR = cast<VPHeaderPHIRecipe>(&getParent()

                                             ->getPlan()

                                             ->getVectorLoopRegion()

                                             ->getEntryBasicBlock()

                                             ->front());

    PHINode *CanonicalIV = cast<PHINode>(State.get(IVR, /*IsScalar*/ true));

    NewPointerPhi = PHINode::Create(ScStValueType, 2, "pointer.phi",

                                    CanonicalIV->getIterator());

    NewPointerPhi->addIncoming(ScalarStartValue, VectorPH);

    NewPointerPhi->setDebugLoc(getDebugLoc());

  } else {

    // The recipe has been unrolled. In that case, fetch the single pointer phi

    // shared among all unrolled parts of the recipe.

    auto *GEP =

        cast<GetElementPtrInst>(State.get(getFirstUnrolledPartOperand()));

    NewPointerPhi = cast<PHINode>(GEP->getPointerOperand());

  }


  // A pointer induction, performed by using a gep

  BasicBlock::iterator InductionLoc = State.Builder.GetInsertPoint();

  Value *ScalarStepValue = State.get(getStepValue(), VPLane(0));

  Type *PhiType = State.TypeAnalysis.inferScalarType(getStepValue());

  Value *RuntimeVF = getRuntimeVF(State.Builder, PhiType, State.VF);

  // Add induction update using an incorrect block temporarily. The phi node

  // will be fixed after VPlan execution. Note that at this point the latch

  // block cannot be used, as it does not exist yet.

  // TODO: Model increment value in VPlan, by turning the recipe into a

  // multi-def and a subclass of VPHeaderPHIRecipe.

  if (CurrentPart == 0) {

    // The recipe represents the first part of the pointer induction. Create the

    // GEP to increment the phi across all unrolled parts.

    unsigned UF = CurrentPart == 0 ? getParent()->getPlan()->getUF() : 1;

    Value *NumUnrolledElems =

        State.Builder.CreateMul(RuntimeVF, ConstantInt::get(PhiType, UF));


    Value *InductionGEP = GetElementPtrInst::Create(

        State.Builder.getInt8Ty(), NewPointerPhi,

        State.Builder.CreateMul(ScalarStepValue, NumUnrolledElems), "ptr.ind",

        InductionLoc);


    NewPointerPhi->addIncoming(InductionGEP, VectorPH);

  }


  // Create actual address geps that use the pointer phi as base and a

  // vectorized version of the step value (<step*0, ..., step*N>) as offset.

  Type *VecPhiType = VectorType::get(PhiType, State.VF);

  Value *StartOffsetScalar = State.Builder.CreateMul(

      RuntimeVF, ConstantInt::get(PhiType, CurrentPart));

  Value *StartOffset =

      State.Builder.CreateVectorSplat(State.VF, StartOffsetScalar);

  // Create a vector of consecutive numbers from zero to VF.

  StartOffset = State.Builder.CreateAdd(

      StartOffset, State.Builder.CreateStepVector(VecPhiType));


  assert(ScalarStepValue == State.get(getOperand(1), VPLane(0)) &&

         "scalar step must be the same across all parts");

  Value *GEP = State.Builder.CreateGEP(

      State.Builder.getInt8Ty(), NewPointerPhi,

      State.Builder.CreateMul(StartOffset, State.Builder.CreateVectorSplat(

                                               State.VF, ScalarStepValue)),

      "vector.gep");

  State.set(this, GEP);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,

                                          VPSlotTracker &SlotTracker) const {

  assert((getNumOperands() == 2 || getNumOperands() == 4) &&

         "unexpected number of operands");

  O << Indent << "EMIT ";

  printAsOperand(O, SlotTracker);

  O << " = WIDEN-POINTER-INDUCTION ";

  getStartValue()->printAsOperand(O, SlotTracker);

  O << ", ";

  getStepValue()->printAsOperand(O, SlotTracker);

  if (getNumOperands() == 4) {

    O << ", ";

    getOperand(2)->printAsOperand(O, SlotTracker);

    O << ", ";

    getOperand(3)->printAsOperand(O, SlotTracker);

  }

}

#endif


void VPExpandSCEVRecipe::execute(VPTransformState &State) {

  assert(!State.Lane && "cannot be used in per-lane");

  if (State.ExpandedSCEVs.contains(Expr)) {

    // SCEV Expr has already been expanded, result must already be set. At the

    // moment we have to execute the entry block twice (once before skeleton

    // creation to get expanded SCEVs used by the skeleton and once during

    // regular VPlan execution).

    State.Builder.SetInsertPoint(State.CFG.VPBB2IRBB[getParent()]);

    assert(State.get(this, VPLane(0)) == State.ExpandedSCEVs[Expr] &&

           "Results must match");

    return;

  }


  const DataLayout &DL = State.CFG.PrevBB->getDataLayout();

  SCEVExpander Exp(SE, DL, "induction");


  Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),

                                 &*State.Builder.GetInsertPoint());

  State.ExpandedSCEVs[Expr] = Res;

  State.set(this, Res, VPLane(0));

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent,

                               VPSlotTracker &SlotTracker) const {

  O << Indent << "EMIT ";

  printAsOperand(O, SlotTracker);

  O << " = EXPAND SCEV " << *Expr;

}

#endif


void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {

  Value *CanonicalIV = State.get(getOperand(0), /*IsScalar*/ true);

  Type *STy = CanonicalIV->getType();

  IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());

  ElementCount VF = State.VF;

  Value *VStart = VF.isScalar()

                      ? CanonicalIV

                      : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");

  Value *VStep = createStepForVF(Builder, STy, VF, getUnrollPart(*this));

  if (VF.isVector()) {

    VStep = Builder.CreateVectorSplat(VF, VStep);

    VStep =

        Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));

  }

  Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");

  State.set(this, CanonicalVectorIV);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,

                                     VPSlotTracker &SlotTracker) const {

  O << Indent << "EMIT ";

  printAsOperand(O, SlotTracker);

  O << " = WIDEN-CANONICAL-INDUCTION ";

  printOperands(O, SlotTracker);

}

#endif


void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {

  auto &Builder = State.Builder;

  // Create a vector from the initial value.

  auto *VectorInit = getStartValue()->getLiveInIRValue();


  Type *VecTy = State.VF.isScalar()

                    ? VectorInit->getType()

                    : VectorType::get(VectorInit->getType(), State.VF);


  BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);

  if (State.VF.isVector()) {

    auto *IdxTy = Builder.getInt32Ty();

    auto *One = ConstantInt::get(IdxTy, 1);

    IRBuilder<>::InsertPointGuard Guard(Builder);

    Builder.SetInsertPoint(VectorPH->getTerminator());

    auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);

    auto *LastIdx = Builder.CreateSub(RuntimeVF, One);

    VectorInit = Builder.CreateInsertElement(

        PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");

  }


  // Create a phi node for the new recurrence.

  PHINode *Phi = PHINode::Create(VecTy, 2, "vector.recur");

  Phi->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());

  Phi->addIncoming(VectorInit, VectorPH);

  State.set(this, Phi);

}


InstructionCost

VPFirstOrderRecurrencePHIRecipe::computeCost(ElementCount VF,

                                             VPCostContext &Ctx) const {

  if (VF.isScalar())

    return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);


  if (VF.isScalable() && VF.getKnownMinValue() == 1)

    return InstructionCost::getInvalid();


  SmallVector<int> Mask(VF.getKnownMinValue());

  std::iota(Mask.begin(), Mask.end(), VF.getKnownMinValue() - 1);

  Type *VectorTy =

      toVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF);


  return Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Splice,

                                cast<VectorType>(VectorTy), Mask, Ctx.CostKind,

                                VF.getKnownMinValue() - 1);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent,

                                            VPSlotTracker &SlotTracker) const {

  O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";

  printAsOperand(O, SlotTracker);

  O << " = phi ";

  printOperands(O, SlotTracker);

}

#endif


void VPReductionPHIRecipe::execute(VPTransformState &State) {

  auto &Builder = State.Builder;


  // If this phi is fed by a scaled reduction then it should output a

  // vector with fewer elements than the VF.

  ElementCount VF = State.VF.divideCoefficientBy(VFScaleFactor);


  // Reductions do not have to start at zero. They can start with

  // any loop invariant values.

  VPValue *StartVPV = getStartValue();

  Value *StartV = StartVPV->getLiveInIRValue();


  // In order to support recurrences we need to be able to vectorize Phi nodes.

  // Phi nodes have cycles, so we need to vectorize them in two stages. This is

  // stage #1: We create a new vector PHI node with no incoming edges. We'll use

  // this value when we vectorize all of the instructions that use the PHI.

  bool ScalarPHI = State.VF.isScalar() || IsInLoop;

  Type *VecTy =

      ScalarPHI ? StartV->getType() : VectorType::get(StartV->getType(), VF);


  BasicBlock *HeaderBB = State.CFG.PrevBB;

  assert(State.CurrentParentLoop->getHeader() == HeaderBB &&

         "recipe must be in the vector loop header");

  auto *Phi = PHINode::Create(VecTy, 2, "vec.phi");

  Phi->insertBefore(HeaderBB->getFirstInsertionPt());

  State.set(this, Phi, IsInLoop);


  BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);


  Value *Iden = nullptr;

  RecurKind RK = RdxDesc.getRecurrenceKind();

  unsigned CurrentPart = getUnrollPart(*this);


  if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||

      RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) {

    // MinMax and AnyOf reductions have the start value as their identity.

    if (ScalarPHI) {

      Iden = StartV;

    } else {

      IRBuilderBase::InsertPointGuard IPBuilder(Builder);

      Builder.SetInsertPoint(VectorPH->getTerminator());

      StartV = Iden = State.get(StartVPV);

    }

  } else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) {

    // [I|F]FindLastIV will use a sentinel value to initialize the reduction

    // phi or the resume value from the main vector loop when vectorizing the

    // epilogue loop. In the exit block, ComputeReductionResult will generate

    // checks to verify if the reduction result is the sentinel value. If the

    // result is the sentinel value, it will be corrected back to the start

    // value.

    // TODO: The sentinel value is not always necessary. When the start value is

    // a constant, and smaller than the start value of the induction variable,

    // the start value can be directly used to initialize the reduction phi.

    Iden = StartV;

    if (!ScalarPHI) {

      IRBuilderBase::InsertPointGuard IPBuilder(Builder);

      Builder.SetInsertPoint(VectorPH->getTerminator());

      StartV = Iden = Builder.CreateVectorSplat(State.VF, Iden);

    }

  } else {

    Iden = llvm::getRecurrenceIdentity(RK, VecTy->getScalarType(),

                                       RdxDesc.getFastMathFlags());


    if (!ScalarPHI) {

      if (CurrentPart == 0) {

        // Create start and identity vector values for the reduction in the

        // preheader.

        // TODO: Introduce recipes in VPlan preheader to create initial values.

        Iden = Builder.CreateVectorSplat(VF, Iden);

        IRBuilderBase::InsertPointGuard IPBuilder(Builder);

        Builder.SetInsertPoint(VectorPH->getTerminator());

        Constant *Zero = Builder.getInt32(0);

        StartV = Builder.CreateInsertElement(Iden, StartV, Zero);

      } else {

        Iden = Builder.CreateVectorSplat(VF, Iden);

      }

    }

  }


  Phi = cast<PHINode>(State.get(this, IsInLoop));

  Value *StartVal = (CurrentPart == 0) ? StartV : Iden;

  Phi->addIncoming(StartVal, VectorPH);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,

                                 VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN-REDUCTION-PHI ";


  printAsOperand(O, SlotTracker);

  O << " = phi ";

  printOperands(O, SlotTracker);

  if (VFScaleFactor != 1)

    O << " (VF scaled by 1/" << VFScaleFactor << ")";

}

#endif


void VPWidenPHIRecipe::execute(VPTransformState &State) {

  assert(EnableVPlanNativePath &&

         "Non-native vplans are not expected to have VPWidenPHIRecipes.");


  State.setDebugLocFrom(getDebugLoc());

  Value *Op0 = State.get(getOperand(0));

  Type *VecTy = Op0->getType();

  Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");

  State.set(this, VecPhi);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,

                             VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN-PHI ";


  auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());

  // Unless all incoming values are modeled in VPlan  print the original PHI

  // directly.

  // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming

  // values as VPValues.

  if (getNumOperands() != OriginalPhi->getNumOperands()) {

    O << VPlanIngredient(OriginalPhi);

    return;

  }


  printAsOperand(O, SlotTracker);

  O << " = phi ";

  printOperands(O, SlotTracker);

}

#endif


// TODO: It would be good to use the existing VPWidenPHIRecipe instead and

// remove VPActiveLaneMaskPHIRecipe.

void VPActiveLaneMaskPHIRecipe::execute(VPTransformState &State) {

  BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);

  Value *StartMask = State.get(getOperand(0));

  PHINode *Phi =

      State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");

  Phi->addIncoming(StartMask, VectorPH);

  Phi->setDebugLoc(getDebugLoc());

  State.set(this, Phi);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent,

                                      VPSlotTracker &SlotTracker) const {

  O << Indent << "ACTIVE-LANE-MASK-PHI ";


  printAsOperand(O, SlotTracker);

  O << " = phi ";

  printOperands(O, SlotTracker);

}

#endif


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPEVLBasedIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,

                                  VPSlotTracker &SlotTracker) const {

  O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI ";


  printAsOperand(O, SlotTracker);

  O << " = phi ";

  printOperands(O, SlotTracker);

}

#endif


void VPScalarPHIRecipe::execute(VPTransformState &State) {

  BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);

  Value *Start = State.get(getStartValue(), VPLane(0));

  PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, Name);

  Phi->addIncoming(Start, VectorPH);

  Phi->setDebugLoc(getDebugLoc());

  State.set(this, Phi, /*IsScalar=*/true);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPScalarPHIRecipe::print(raw_ostream &O, const Twine &Indent,

                              VPSlotTracker &SlotTracker) const {

  O << Indent << "SCALAR-PHI ";

  printAsOperand(O, SlotTracker);

  O << " = phi ";

  printOperands(O, SlotTracker);

}

#endif

Arguments
AMDGPU Lower Kernel Arguments
Definition: AMDGPULowerKernelArguments.cpp:504

Select
AMDGPU Register Bank Select
Definition: AMDGPURegBankSelect.cpp:71

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: ARMSLSHardening.cpp:73

BasicBlockUtils.h

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

Casting.h

CommandLine.h

RetTy
return RetTy
Definition: DeadArgumentElimination.cpp:361

Idx
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Definition: DeadArgumentElimination.cpp:353

Debug.h

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition: Debug.h:106

Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:79

Name
std::string Name
Definition: ELFObjHandler.cpp:77

GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:170

IRBuilder.h

BasicBlock.h

Instruction.h

Type.h

Value.h

IVDescriptors.h

Instructions.h

Intrinsics.h

LoopInfo.h

LoopUtils.h

LoopVectorizationPlanner.h
This file provides a LoopVectorizationPlanner class.

ForceTargetInstructionCost
cl::opt< unsigned > ForceTargetInstructionCost("force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for " "an instruction to a single constant value. Mostly " "useful for getting consistent testing."))

I
#define I(x, y, z)
Definition: MD5.cpp:58

Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:74

getDebugLoc
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
Definition: MachineInstrBundle.cpp:109

Cond
const SmallVectorImpl< MachineOperand > & Cond
Definition: RISCVRedundantCopyElimination.cpp:75

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

STLExtras.h
This file contains some templates that are useful if you are working with the STL at all.

ScalarEvolutionExpander.h

SmallVector.h
This file defines the SmallVector class.

getType
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39

Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:77

Twine.h

VPlanAnalysis.h

VPlanHelpers.h
This file contains the declarations of different VPlan-related auxiliary helpers.

VPlanPatternMatch.h

createReverseEVL
static Instruction * createReverseEVL(IRBuilderBase &Builder, Value *Operand, Value *EVL, const Twine &Name)
Use all-true mask for reverse rather than actual mask, as it avoids a dependence w/o affecting the re...
Definition: VPlanRecipes.cpp:2634

interleaveVectors
static Value * interleaveVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vals, const Twine &Name)
Return a vector containing interleaved elements from multiple smaller input vectors.
Definition: VPlanRecipes.cpp:2870

createBitOrPointerCast
static Value * createBitOrPointerCast(IRBuilderBase &Builder, Value *V, VectorType *DstVTy, const DataLayout &DL)
Definition: VPlanRecipes.cpp:2838

ForceTargetInstructionCost
cl::opt< unsigned > ForceTargetInstructionCost

getStepVector
static Value * getStepVector(Value *Val, Value *Step, Instruction::BinaryOps BinOp, ElementCount VF, IRBuilderBase &Builder)
This function adds (0 * Step, 1 * Step, 2 * Step, ...) to each vector element of Val.
Definition: VPlanRecipes.cpp:1699

getGEPIndexTy
static Type * getGEPIndexTy(bool IsScalable, bool IsReverse, unsigned CurrentPart, IRBuilderBase &Builder)
Definition: VPlanRecipes.cpp:2060

getSignedIntOrFpConstant
static Constant * getSignedIntOrFpConstant(Type *Ty, int64_t C)
A helper function that returns an integer or floating-point constant with value C.
Definition: VPlanRecipes.cpp:1745

VPlanUtils.h

VPlan.h
This file contains the declarations of the Vectorization Plan base classes:

VectorBuilder.h

RHS
Value * RHS
Definition: X86PartialReduction.cpp:74

IV
static const uint32_t IV[8]
Definition: blake3_impl.h:78

VectorType
Definition: ItaniumDemangle.h:1173

llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41

llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168

llvm::Attribute::getWithAlignment
static Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
Definition: Attributes.cpp:234

llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:61

llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:437

llvm::BasicBlock::getFirstNonPHIIt
InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:381

llvm::BasicBlock::getSinglePredecessor
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:481

llvm::BasicBlock::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
Definition: BasicBlock.cpp:296

llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177

llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:240

llvm::BasicBlock::getModule
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:292

llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3016

llvm::BranchInst::Create
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
Definition: Instructions.h:3072

llvm::BranchInst::setSuccessor
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Definition: Instructions.h:3109

llvm::CallBase::addParamAttr
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Definition: InstrTypes.h:1494

llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1479

llvm::CastInst::isBitOrNoopPointerCastable
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
Definition: Instructions.cpp:3158

llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673

llvm::CmpInst::BAD_ICMP_PREDICATE
@ BAD_ICMP_PREDICATE
Definition: InstrTypes.h:706

llvm::CmpInst::ICMP_UGT
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:696

llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:698

llvm::CmpInst::getPredicateName
static StringRef getPredicateName(Predicate P)
Definition: Instructions.cpp:3583

llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:83

llvm::ConstantInt::getSigned
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.h:126

llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157

llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:42

llvm::DWARFExpression::Operation
This class represents an Operation in the Expression.
Definition: DWARFExpression.h:32

llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63

llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33

llvm::ElementCount
Definition: TypeSize.h:300

llvm::ElementCount::isVector
constexpr bool isVector() const
One or more elements.
Definition: TypeSize.h:326

llvm::ElementCount::isScalar
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:322

llvm::FastMathFlags
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20

llvm::FastMathFlags::setAllowContract
void setAllowContract(bool B=true)
Definition: FMF.h:91

llvm::FastMathFlags::noSignedZeros
bool noSignedZeros() const
Definition: FMF.h:68

llvm::FastMathFlags::noInfs
bool noInfs() const
Definition: FMF.h:67

llvm::FastMathFlags::setAllowReciprocal
void setAllowReciprocal(bool B=true)
Definition: FMF.h:88

llvm::FastMathFlags::allowReciprocal
bool allowReciprocal() const
Definition: FMF.h:69

llvm::FastMathFlags::print
void print(raw_ostream &O) const
Print fast-math flags to O.
Definition: Operator.cpp:271

llvm::FastMathFlags::setNoSignedZeros
void setNoSignedZeros(bool B=true)
Definition: FMF.h:85

llvm::FastMathFlags::allowReassoc
bool allowReassoc() const
Flag queries.
Definition: FMF.h:65

llvm::FastMathFlags::approxFunc
bool approxFunc() const
Definition: FMF.h:71

llvm::FastMathFlags::setNoNaNs
void setNoNaNs(bool B=true)
Definition: FMF.h:79

llvm::FastMathFlags::setAllowReassoc
void setAllowReassoc(bool B=true)
Flag setters.
Definition: FMF.h:76

llvm::FastMathFlags::noNaNs
bool noNaNs() const
Definition: FMF.h:66

llvm::FastMathFlags::setApproxFunc
void setApproxFunc(bool B=true)
Definition: FMF.h:94

llvm::FastMathFlags::setNoInfs
void setNoInfs(bool B=true)
Definition: FMF.h:82

llvm::FastMathFlags::allowContract
bool allowContract() const
Definition: FMF.h:70

llvm::FunctionType
Class to represent function types.
Definition: DerivedTypes.h:105

llvm::FunctionType::getParamType
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:137

llvm::FunctionType::params
ArrayRef< Type * > params() const
Definition: DerivedTypes.h:132

llvm::Function
Definition: Function.h:63

llvm::Function::getFunctionType
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:216

llvm::Function::willReturn
bool willReturn() const
Determine if the function will return.
Definition: Function.h:674

llvm::Function::doesNotThrow
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition: Function.h:607

llvm::Function::getReturnType
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:221

llvm::GEPNoWrapFlags::hasNoUnsignedSignedWrap
bool hasNoUnsignedSignedWrap() const
Definition: GEPNoWrapFlags.h:64

llvm::GEPNoWrapFlags::hasNoUnsignedWrap
bool hasNoUnsignedWrap() const
Definition: GEPNoWrapFlags.h:65

llvm::GEPNoWrapFlags::isInBounds
bool isInBounds() const
Definition: GEPNoWrapFlags.h:63

llvm::GetElementPtrInst::Create
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Definition: Instructions.h:956

llvm::IRBuilderBase::FastMathFlagGuard
Definition: IRBuilder.h:416

llvm::IRBuilderBase::InsertPointGuard
Definition: IRBuilder.h:394

llvm::IRBuilderBase
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:113

llvm::IRBuilderBase::getInt1
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Definition: IRBuilder.h:480

llvm::IRBuilderBase::CreateInsertElement
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2511

llvm::IRBuilderBase::getInt1Ty
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Definition: IRBuilder.h:530

llvm::IRBuilderBase::CreateSIToFP
Value * CreateSIToFP(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2106

llvm::IRBuilderBase::CreateExtractElement
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2499

llvm::IRBuilderBase::CreateAlignedLoad
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1815

llvm::IRBuilderBase::CreateZExtOrTrunc
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2051

llvm::IRBuilderBase::CreateVectorSplice
Value * CreateVectorSplice(Value *V1, Value *V2, int64_t Imm, const Twine &Name="")
Return a vector splice intrinsic if using scalable vectors, otherwise return a shufflevector.
Definition: IRBuilder.cpp:1135

llvm::IRBuilderBase::CreateVectorSplat
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.cpp:1163

llvm::IRBuilderBase::CreateExtractValue
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2555

llvm::IRBuilderBase::getTrue
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:485

llvm::IRBuilderBase::CreateMaskedLoad
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Definition: IRBuilder.cpp:546

llvm::IRBuilderBase::CreateSelect
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1053

llvm::IRBuilderBase::GetInsertPoint
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:194

llvm::IRBuilderBase::CreateSExt
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2045

llvm::IRBuilderBase::CreateFreeze
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2574

llvm::IRBuilderBase::getInt32Ty
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:545

llvm::IRBuilderBase::CreatePtrAdd
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1987

llvm::IRBuilderBase::CreateCast
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Definition: IRBuilder.h:2186

llvm::IRBuilderBase::CreateUIToFP
Value * CreateUIToFP(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2093

llvm::IRBuilderBase::GetInsertBlock
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:193

llvm::IRBuilderBase::setFastMathFlags
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:330

llvm::IRBuilderBase::getInt64Ty
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:550

llvm::IRBuilderBase::CreateVectorReverse
Value * CreateVectorReverse(Value *V, const Twine &Name="")
Return a vector value that contains the vector V reversed.
Definition: IRBuilder.cpp:1119

llvm::IRBuilderBase::CreateFCmpFMF
Value * CreateFCmpFMF(CmpInst::Predicate P, Value *LHS, Value *RHS, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2398

llvm::IRBuilderBase::CreateGEP
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1874

llvm::IRBuilderBase::CreateNeg
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNSW=false)
Definition: IRBuilder.h:1733

llvm::IRBuilderBase::CreateOrReduce
CallInst * CreateOrReduce(Value *Src)
Create a vector int OR reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:424

llvm::IRBuilderBase::saveIP
InsertPoint saveIP() const
Returns the current insert point.
Definition: IRBuilder.h:296

llvm::IRBuilderBase::CreateIntrinsic
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:900

llvm::IRBuilderBase::getInt32
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:505

llvm::IRBuilderBase::CreateBitOrPointerCast
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2234

llvm::IRBuilderBase::CreateCmp
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2404

llvm::IRBuilderBase::CreatePHI
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2435

llvm::IRBuilderBase::CreateNot
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1757

llvm::IRBuilderBase::CreateICmpEQ
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2270

llvm::IRBuilderBase::CreateCountTrailingZeroElems
Value * CreateCountTrailingZeroElems(Type *ResTy, Value *Mask, bool ZeroIsPoison=true, const Twine &Name="")
Create a call to llvm.experimental_cttz_elts.
Definition: IRBuilder.h:1101

llvm::IRBuilderBase::CreateSub
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1387

llvm::IRBuilderBase::CreateCondBr
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1164

llvm::IRBuilderBase::CreateNAryOp
Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Definition: IRBuilder.cpp:968

llvm::IRBuilderBase::CreateZExt
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2033

llvm::IRBuilderBase::CreateShuffleVector
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2533

llvm::IRBuilderBase::getContext
LLVMContext & getContext() const
Definition: IRBuilder.h:195

llvm::IRBuilderBase::CreateMaskedStore
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Definition: IRBuilder.cpp:566

llvm::IRBuilderBase::CreateAdd
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1370

llvm::IRBuilderBase::CreateCall
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2449

llvm::IRBuilderBase::CreateTrunc
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2019

llvm::IRBuilderBase::getPtrTy
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition: IRBuilder.h:588

llvm::IRBuilderBase::CreateBinOp
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1671

llvm::IRBuilderBase::CreateLogicalAnd
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1688

llvm::IRBuilderBase::restoreIP
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
Definition: IRBuilder.h:308

llvm::IRBuilderBase::SetInsertPoint
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:199

llvm::IRBuilderBase::CreateAlignedStore
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1834

llvm::IRBuilderBase::CreateICmp
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2380

llvm::IRBuilderBase::CreateFMul
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1614

llvm::IRBuilderBase::getInt8Ty
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:535

llvm::IRBuilderBase::CreateStepVector
Value * CreateStepVector(Type *DstType, const Twine &Name="")
Creates a vector of type DstType with the linear sequence <0, 1, ...>
Definition: IRBuilder.cpp:108

llvm::IRBuilderBase::CreateMul
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1404

llvm::IRBuilderBase::CreateMaskedScatter
CallInst * CreateMaskedScatter(Value *Val, Value *Ptrs, Align Alignment, Value *Mask=nullptr)
Create a call to Masked Scatter intrinsic.
Definition: IRBuilder.cpp:627

llvm::IRBuilderBase::CreateMaskedGather
CallInst * CreateMaskedGather(Type *Ty, Value *Ptrs, Align Alignment, Value *Mask=nullptr, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Gather intrinsic.
Definition: IRBuilder.cpp:596

llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2705

llvm::InductionDescriptor
A struct for saving information about induction variables.
Definition: IVDescriptors.h:334

llvm::InductionDescriptor::IK_PtrInduction
@ IK_PtrInduction
Pointer induction var. Step = C.
Definition: IVDescriptors.h:340

llvm::InsertElementInst
This instruction inserts a single (scalar) element into a VectorType value.
Definition: Instructions.h:1834

llvm::InsertElementInst::getType
VectorType * getType() const
Overload to return most specific vector type.
Definition: Instructions.h:1862

llvm::InstructionCost
Definition: InstructionCost.h:29

llvm::InstructionCost::getInvalid
static InstructionCost getInvalid(CostType Val=0)
Definition: InstructionCost.h:73

llvm::InstructionCost::isValid
bool isValid() const
Definition: InstructionCost.h:79

llvm::Instruction
Definition: Instruction.h:68

llvm::Instruction::insertBefore
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:99

llvm::Instruction::isBinaryOp
bool isBinaryOp() const
Definition: Instruction.h:315

llvm::Instruction::eraseFromParent
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:94

llvm::Instruction::getFastMathFlags
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
Definition: Instruction.cpp:651

llvm::Instruction::getOpcodeName
const char * getOpcodeName() const
Definition: Instruction.h:312

llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:310

llvm::Instruction::BinaryOps
BinaryOps
Definition: Instruction.h:1008

llvm::Instruction::isUnaryOp
bool isUnaryOp() const
Definition: Instruction.h:314

llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:508

llvm::Instruction::CastOps
CastOps
Definition: Instruction.h:1022

llvm::IntegerType::get
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311

llvm::InterleaveGroup
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:488

llvm::InterleaveGroup::getFactor
uint32_t getFactor() const
Definition: VectorUtils.h:504

llvm::InterleaveGroup::getMember
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:558

llvm::InterleaveGroup::isReverse
bool isReverse() const
Definition: VectorUtils.h:503

llvm::InterleaveGroup::getInsertPos
InstTy * getInsertPos() const
Definition: VectorUtils.h:574

llvm::InterleaveGroup::addMetadata
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
Definition: VectorUtils.cpp:1632

llvm::InterleaveGroup::getAlign
Align getAlign() const
Definition: VectorUtils.h:505

llvm::IntrinsicCostAttributes
Definition: TargetTransformInfo.h:119

llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67

llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: GenericLoopInfo.h:90

llvm::MachineBasicBlock::print
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
Definition: MachineBasicBlock.cpp:345

llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65

llvm::PHINode
Definition: Instructions.h:2600

llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2735

llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:2635

llvm::PoisonValue::get
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878

llvm::RecurrenceDescriptor
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:77

llvm::RecurrenceDescriptor::getFastMathFlags
FastMathFlags getFastMathFlags() const
Definition: IVDescriptors.h:214

llvm::RecurrenceDescriptor::getOpcode
static unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
Definition: IVDescriptors.cpp:1130

llvm::RecurrenceDescriptor::getRecurrenceType
Type * getRecurrenceType() const
Returns the type of the recurrence.
Definition: IVDescriptors.h:264

llvm::RecurrenceDescriptor::getRecurrenceStartValue
TrackingVH< Value > getRecurrenceStartValue() const
Definition: IVDescriptors.h:216

llvm::RecurrenceDescriptor::isAnyOfRecurrenceKind
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
Definition: IVDescriptors.h:252

llvm::RecurrenceDescriptor::isFindLastIVRecurrenceKind
static bool isFindLastIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
Definition: IVDescriptors.h:258

llvm::RecurrenceDescriptor::isSigned
bool isSigned() const
Returns true if all source operands of the recurrence are SExtInsts.
Definition: IVDescriptors.h:285

llvm::RecurrenceDescriptor::getRecurrenceKind
RecurKind getRecurrenceKind() const
Definition: IVDescriptors.h:210

llvm::RecurrenceDescriptor::IntermediateStore
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
Definition: IVDescriptors.h:304

llvm::RecurrenceDescriptor::isMinMaxRecurrenceKind
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
Definition: IVDescriptors.h:246

llvm::SCEVExpander
This class uses information about analyze scalars to rewrite expressions in canonical form.
Definition: ScalarEvolutionExpander.h:63

llvm::SCEV::getType
Type * getType() const
Return the LLVM type of this SCEV expression.
Definition: ScalarEvolution.cpp:386

llvm::SelectInst
This class represents the LLVM 'select' instruction.
Definition: Instructions.h:1657

llvm::SlotTracker
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:698

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition: SmallVector.h:413

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51

llvm::TargetTransformInfo::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo Op1Info={OK_AnyValue, OP_None}, OperandValueInfo Op2Info={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:1067

llvm::TargetTransformInfo::getAddressComputationCost
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
Definition: TargetTransformInfo.cpp:1198

llvm::TargetTransformInfo::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo OpdInfo={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:1125

llvm::TargetTransformInfo::getInterleavedMemoryOpCost
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
Definition: TargetTransformInfo.cpp:1165

llvm::TargetTransformInfo::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
Definition: TargetTransformInfo.cpp:1177

llvm::TargetTransformInfo::getArithmeticReductionCost
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of vector reduction intrinsics.
Definition: TargetTransformInfo.cpp:1215

llvm::TargetTransformInfo::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:1039

llvm::TargetTransformInfo::getOperandInfo
static OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
Definition: TargetTransformInfo.cpp:880

llvm::TargetTransformInfo::getMinMaxReductionCost
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF=FastMathFlags(), TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Definition: TargetTransformInfo.cpp:1224

llvm::TargetTransformInfo::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
Definition: TargetTransformInfo.cpp:940

llvm::TargetTransformInfo::OP_None
@ OP_None
Definition: TargetTransformInfo.h:1127

llvm::TargetTransformInfo::getMaskedMemoryOpCost
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Definition: TargetTransformInfo.cpp:1137

llvm::TargetTransformInfo::getShuffleCost
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask={}, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, int Index=0, VectorType *SubTp=nullptr, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const
Definition: TargetTransformInfo.cpp:976

llvm::TargetTransformInfo::getGatherScatterOpCost
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:1146

llvm::TargetTransformInfo::PR_SignExtend
@ PR_SignExtend
Definition: TargetTransformInfo.h:214

llvm::TargetTransformInfo::PR_ZeroExtend
@ PR_ZeroExtend
Definition: TargetTransformInfo.h:214

llvm::TargetTransformInfo::PR_None
@ PR_None
Definition: TargetTransformInfo.h:214

llvm::TargetTransformInfo::TCC_Free
@ TCC_Free
Expected to fold away in lowering.
Definition: TargetTransformInfo.h:289

llvm::TargetTransformInfo::getPartialReductionCost
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, PartialReductionExtendKind OpAExtend, PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp=std::nullopt) const
Definition: TargetTransformInfo.cpp:866

llvm::TargetTransformInfo::SK_Splice
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
Definition: TargetTransformInfo.h:1111

llvm::TargetTransformInfo::SK_Reverse
@ SK_Reverse
Reverse the order of the vector.
Definition: TargetTransformInfo.h:1100

llvm::TargetTransformInfo::getCallInstrCost
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency) const
Definition: TargetTransformInfo.cpp:1185

llvm::TargetTransformInfo::getCFInstrCost
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:1058

llvm::TargetTransformInfo::CastContextHint
CastContextHint
Represents a hint about the context in which a cast is used.
Definition: TargetTransformInfo.h:1389

llvm::TargetTransformInfo::CastContextHint::Reversed
@ Reversed
The cast is used with a reversed load/store.

llvm::TargetTransformInfo::CastContextHint::Masked
@ Masked
The cast is used with a masked load/store.

llvm::TargetTransformInfo::CastContextHint::None
@ None
The cast is not used with a load/store of any kind.

llvm::TargetTransformInfo::CastContextHint::Normal
@ Normal
The cast is used with a normal load/store.

llvm::TargetTransformInfo::CastContextHint::Interleave
@ Interleave
The cast is used with an interleaved load/store.

llvm::TargetTransformInfo::CastContextHint::GatherScatter
@ GatherScatter
The cast is used with a gather/scatter.

llvm::TargetTransformInfo::OK_UniformValue
@ OK_UniformValue
Definition: TargetTransformInfo.h:1120

llvm::TargetTransformInfo::OK_AnyValue
@ OK_AnyValue
Definition: TargetTransformInfo.h:1119

llvm::TruncInst
This class represents a truncation of integer types.
Definition: Instructions.h:4503

llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45

llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:270

llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:264

llvm::Type::getInt1Ty
static IntegerType * getInt1Ty(LLVMContext &C)

llvm::Type::getIntNTy
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)

llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

llvm::Type::getVoidTy
static Type * getVoidTy(LLVMContext &C)

llvm::Type::getContext
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128

llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184

llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237

llvm::Type::getTypeID
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136

llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139

llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355

llvm::User::value_op_end
value_op_iterator value_op_end()
Definition: User.h:309

llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:228

llvm::User::value_op_begin
value_op_iterator value_op_begin()
Definition: User.h:306

llvm::VPActiveLaneMaskPHIRecipe::execute
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
Definition: VPlanRecipes.cpp:3573

llvm::VPActiveLaneMaskPHIRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:3584

llvm::VPBasicBlock
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:3200

llvm::VPBasicBlock::getRecipeList
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition: VPlan.h:3253

llvm::VPBasicBlock::end
iterator end()
Definition: VPlan.h:3237

llvm::VPBasicBlock::insert
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:3266

llvm::VPBlendRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:2184

llvm::VPBlendRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
Definition: VPlanRecipes.cpp:2169

llvm::VPBlendRecipe::getIncomingValue
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition: VPlan.h:2187

llvm::VPBlendRecipe::getMask
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition: VPlan.h:2192

llvm::VPBlendRecipe::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition: VPlan.h:2182

llvm::VPBlendRecipe::execute
void execute(VPTransformState &State) override
Generate the phi/select nodes.
Definition: VPlanRecipes.cpp:2132

llvm::VPBlendRecipe::isNormalized
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition: VPlan.h:2178

llvm::VPBlockBase
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:78

llvm::VPBlockBase::getParent
VPRegionBlock * getParent()
Definition: VPlan.h:170

llvm::VPBlockBase::getExitingBasicBlock
const VPBasicBlock * getExitingBasicBlock() const
Definition: VPlan.cpp:180

llvm::VPBlockBase::getPredecessors
const VPBlocksTy & getPredecessors() const
Definition: VPlan.h:201

llvm::VPBlockBase::getPlan
VPlan * getPlan()
Definition: VPlan.cpp:155

llvm::VPBlockBase::getEntryBasicBlock
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:160

llvm::VPBranchOnMaskRecipe::getMask
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2554

llvm::VPBranchOnMaskRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPBranchOnMaskRecipe.
Definition: VPlanRecipes.cpp:2478

llvm::VPBranchOnMaskRecipe::execute
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
Definition: VPlanRecipes.cpp:2457

llvm::VPBuilder
VPlan-based builder utility analogous to IRBuilder.
Definition: LoopVectorizationPlanner.h:46

llvm::VPBuilder::createNaryOp
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
Definition: LoopVectorizationPlanner.h:149

llvm::VPCanonicalIVPHIRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:3210

llvm::VPDef
This class augments a recipe with a set of VPValues defined by the recipe.
Definition: VPlanValue.h:298

llvm::VPDef::dump
void dump() const
Dump the VPDef to stderr (for debugging).
Definition: VPlan.cpp:116

llvm::VPDef::getNumDefinedValues
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
Definition: VPlanValue.h:421

llvm::VPDef::definedValues
ArrayRef< VPValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
Definition: VPlanValue.h:416

llvm::VPDef::getVPSingleValue
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
Definition: VPlanValue.h:394

llvm::VPDef::getVPValue
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
Definition: VPlanValue.h:406

llvm::VPDef::getVPDefID
unsigned getVPDefID() const
Definition: VPlanValue.h:426

llvm::VPDerivedIVRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:1875

llvm::VPDerivedIVRecipe::getStepValue
VPValue * getStepValue() const
Definition: VPlan.h:3130

llvm::VPDerivedIVRecipe::getStartValue
VPValue * getStartValue() const
Definition: VPlan.h:3129

llvm::VPEVLBasedIVPHIRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:3595

llvm::VPExpandSCEVRecipe::execute
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with.
Definition: VPlanRecipes.cpp:3326

llvm::VPExpandSCEVRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:3349

llvm::VPHeaderPHIRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
Definition: VPlanRecipes.cpp:1690

llvm::VPHeaderPHIRecipe::getStartValue
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition: VPlan.h:1729

llvm::VPHistogramRecipe::execute
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
Definition: VPlanRecipes.cpp:1180

llvm::VPHistogramRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
Definition: VPlanRecipes.cpp:1210

llvm::VPHistogramRecipe::getMask
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition: VPlan.h:1471

llvm::VPHistogramRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:1247

llvm::VPIRInstruction::getInstruction
Instruction & getInstruction() const
Definition: VPlan.h:1060

llvm::VPIRInstruction::execute
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition: VPlanRecipes.cpp:918

llvm::VPIRInstruction::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
Definition: VPlanRecipes.cpp:947

llvm::VPIRInstruction::extractLastLaneOfOperand
void extractLastLaneOfOperand(VPBuilder &Builder)
Update the recipes single operand to the last lane of the operand using Builder.
Definition: VPlanRecipes.cpp:954

llvm::VPIRInstruction::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:971

llvm::VPInstruction::BranchOnCond
@ BranchOnCond
Definition: VPlan.h:868

llvm::VPInstruction::PtrAdd
@ PtrAdd
Definition: VPlan.h:878

llvm::VPInstruction::ResumePhi
@ ResumePhi
Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
Definition: VPlan.h:863

llvm::VPInstruction::BranchOnCount
@ BranchOnCount
Definition: VPlan.h:867

llvm::VPInstruction::ExtractFirstActive
@ ExtractFirstActive
Definition: VPlan.h:884

llvm::VPInstruction::ActiveLaneMask
@ ActiveLaneMask
Definition: VPlan.h:857

llvm::VPInstruction::FirstOrderRecurrenceSplice
@ FirstOrderRecurrenceSplice
Definition: VPlan.h:851

llvm::VPInstruction::ExplicitVectorLength
@ ExplicitVectorLength
Definition: VPlan.h:858

llvm::VPInstruction::SLPStore
@ SLPStore
Definition: VPlan.h:856

llvm::VPInstruction::LogicalAnd
@ LogicalAnd
Definition: VPlan.h:874

llvm::VPInstruction::CanonicalIVIncrementForPart
@ CanonicalIVIncrementForPart
Definition: VPlan.h:866

llvm::VPInstruction::SLPLoad
@ SLPLoad
Definition: VPlan.h:855

llvm::VPInstruction::ExtractFromEnd
@ ExtractFromEnd
Definition: VPlan.h:873

llvm::VPInstruction::ComputeReductionResult
@ ComputeReductionResult
Definition: VPlan.h:869

llvm::VPInstruction::Not
@ Not
Definition: VPlan.h:854

llvm::VPInstruction::CalculateTripCountMinusVF
@ CalculateTripCountMinusVF
Definition: VPlan.h:864

llvm::VPInstruction::AnyOf
@ AnyOf
Definition: VPlan.h:881

llvm::VPInstruction::hasResult
bool hasResult() const
Definition: VPlan.h:989

llvm::VPInstruction::opcodeMayReadOrWriteFromMemory
bool opcodeMayReadOrWriteFromMemory() const
Returns true if the underlying opcode may read from or write to memory.
Definition: VPlanRecipes.cpp:771

llvm::VPInstruction::dump
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
Definition: VPlanRecipes.cpp:838

llvm::VPInstruction::getOpcode
unsigned getOpcode() const
Definition: VPlan.h:966

llvm::VPInstruction::onlyFirstPartUsed
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition: VPlanRecipes.cpp:818

llvm::VPInstruction::isVectorToScalar
bool isVectorToScalar() const
Returns true if this VPInstruction produces a scalar value from a vector, e.g.
Definition: VPlanRecipes.cpp:714

llvm::VPInstruction::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
Definition: VPlanRecipes.cpp:843

llvm::VPInstruction::onlyFirstLaneUsed
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlanRecipes.cpp:792

llvm::VPInstruction::isSingleScalar
bool isSingleScalar() const
Returns true if this VPInstruction's operands are single scalars and the result is also a single scal...
Definition: VPlanRecipes.cpp:721

llvm::VPInstruction::execute
void execute(VPTransformState &State) override
Generate the instruction.
Definition: VPlanRecipes.cpp:736

llvm::VPInterleaveRecipe::getAddr
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2266

llvm::VPInterleaveRecipe::getMask
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2272

llvm::VPInterleaveRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:3139

llvm::VPInterleaveRecipe::execute
void execute(VPTransformState &State) override
Generate the wide load or store, and shuffles.
Definition: VPlanRecipes.cpp:2927

llvm::VPInterleaveRecipe::getStoredValues
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition: VPlan.h:2279

llvm::VPInterleaveRecipe::getInsertPos
Instruction * getInsertPos() const
Definition: VPlan.h:2314

llvm::VPInterleaveRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInterleaveRecipe.
Definition: VPlanRecipes.cpp:3169

llvm::VPInterleaveRecipe::getNumStoreOperands
unsigned getNumStoreOperands() const
Returns the number of stored operands of this interleave group.
Definition: VPlan.h:2303

llvm::VPIntrinsic::isVPIntrinsic
static bool isVPIntrinsic(Intrinsic::ID)
Definition: IntrinsicInst.cpp:488

llvm::VPLane
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Definition: VPlanHelpers.h:116

llvm::VPLane::getLastLaneForVF
static VPLane getLastLaneForVF(const ElementCount &VF)
Definition: VPlanHelpers.h:157

llvm::VPLane::getLaneFromEnd
static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset)
Definition: VPlanHelpers.h:143

llvm::VPLane::getFirstLane
static VPLane getFirstLane()
Definition: VPlanHelpers.h:141

llvm::VPPartialReductionRecipe::execute
void execute(VPTransformState &State) override
Generate the reduction in the loop.
Definition: VPlanRecipes.cpp:319

llvm::VPPartialReductionRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:340

llvm::VPPartialReductionRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPartialReductionRecipe.
Definition: VPlanRecipes.cpp:284

llvm::VPPartialReductionRecipe::getOpcode
unsigned getOpcode() const
Get the binary op's opcode.
Definition: VPlan.h:2147

llvm::VPPredInstPHIRecipe::execute
void execute(VPTransformState &State) override
Generates phi nodes for live-outs (from a replicate region) as needed to retain SSA form.
Definition: VPlanRecipes.cpp:2486

llvm::VPPredInstPHIRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:2536

llvm::VPRecipeBase
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:366

llvm::VPRecipeBase::mayReadFromMemory
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
Definition: VPlanRecipes.cpp:102

llvm::VPRecipeBase::mayHaveSideEffects
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
Definition: VPlanRecipes.cpp:148

llvm::VPRecipeBase::mayWriteToMemory
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
Definition: VPlanRecipes.cpp:54

llvm::VPRecipeBase::computeCost
virtual InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
Definition: VPlanRecipes.cpp:278

llvm::VPRecipeBase::getParent
VPBasicBlock * getParent()
Definition: VPlan.h:391

llvm::VPRecipeBase::getDebugLoc
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition: VPlan.h:460

llvm::VPRecipeBase::moveBefore
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
Definition: VPlanRecipes.cpp:242

llvm::VPRecipeBase::insertBefore
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
Definition: VPlanRecipes.cpp:205

llvm::VPRecipeBase::insertAfter
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
Definition: VPlanRecipes.cpp:219

llvm::VPRecipeBase::eraseFromParent
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: VPlanRecipes.cpp:232

llvm::VPRecipeBase::cost
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
Definition: VPlanRecipes.cpp:248

llvm::VPRecipeBase::removeFromParent
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
Definition: VPlanRecipes.cpp:226

llvm::VPRecipeBase::moveAfter
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
Definition: VPlanRecipes.cpp:237

llvm::VPRecipeWithIRFlags
Class to record LLVM IR flag for a recipe along with it.
Definition: VPlan.h:577

llvm::VPRecipeWithIRFlags::ExactFlags
ExactFlagsTy ExactFlags
Definition: VPlan.h:627

llvm::VPRecipeWithIRFlags::FMFs
FastMathFlagsTy FMFs
Definition: VPlan.h:630

llvm::VPRecipeWithIRFlags::NonNegFlags
NonNegFlagsTy NonNegFlags
Definition: VPlan.h:629

llvm::VPRecipeWithIRFlags::getGEPNoWrapFlags
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition: VPlan.h:798

llvm::VPRecipeWithIRFlags::setFlags
void setFlags(Instruction *I) const
Set the IR flags for I.
Definition: VPlan.h:759

llvm::VPRecipeWithIRFlags::hasFastMathFlags
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition: VPlan.h:801

llvm::VPRecipeWithIRFlags::DisjointFlags
DisjointFlagsTy DisjointFlags
Definition: VPlan.h:626

llvm::VPRecipeWithIRFlags::GEPFlags
GEPNoWrapFlags GEPFlags
Definition: VPlan.h:628

llvm::VPRecipeWithIRFlags::WrapFlags
WrapFlagsTy WrapFlags
Definition: VPlan.h:625

llvm::VPRecipeWithIRFlags::hasNoUnsignedWrap
bool hasNoUnsignedWrap() const
Definition: VPlan.h:805

llvm::VPRecipeWithIRFlags::printFlags
void printFlags(raw_ostream &O) const
Definition: VPlanRecipes.cpp:1352

llvm::VPRecipeWithIRFlags::getPredicate
CmpInst::Predicate getPredicate() const
Definition: VPlan.h:792

llvm::VPRecipeWithIRFlags::hasNoSignedWrap
bool hasNoSignedWrap() const
Definition: VPlan.h:811

llvm::VPRecipeWithIRFlags::getFastMathFlags
FastMathFlags getFastMathFlags() const
Definition: VPlanRecipes.cpp:349

llvm::VPReductionEVLRecipe::execute
void execute(VPTransformState &State) override
Generate the reduction in the loop.
Definition: VPlanRecipes.cpp:2257

llvm::VPReductionEVLRecipe::getEVL
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition: VPlan.h:2427

llvm::VPReductionEVLRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:2350

llvm::VPReductionPHIRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:3527

llvm::VPReductionPHIRecipe::execute
void execute(VPTransformState &State) override
Generate the phi/select nodes.
Definition: VPlanRecipes.cpp:3442

llvm::VPReductionRecipe::isConditional
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition: VPlan.h:2385

llvm::VPReductionRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of VPReductionRecipe.
Definition: VPlanRecipes.cpp:2295

llvm::VPReductionRecipe::getVecOp
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition: VPlan.h:2389

llvm::VPReductionRecipe::getRecurrenceDescriptor
const RecurrenceDescriptor & getRecurrenceDescriptor() const
Return the recurrence decriptor for the in-loop reduction.
Definition: VPlan.h:2379

llvm::VPReductionRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:2329

llvm::VPReductionRecipe::getCondOp
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition: VPlan.h:2391

llvm::VPReductionRecipe::isOrdered
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition: VPlan.h:2383

llvm::VPReductionRecipe::getChainOp
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition: VPlan.h:2387

llvm::VPReductionRecipe::execute
void execute(VPTransformState &State) override
Generate the reduction in the loop.
Definition: VPlanRecipes.cpp:2207

llvm::VPRegionBlock
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:3377

llvm::VPRegionBlock::getEntry
const VPBlockBase * getEntry() const
Definition: VPlan.h:3413

llvm::VPReplicateRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:2397

llvm::VPReplicateRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPReplicateRecipe.
Definition: VPlanRecipes.cpp:2387

llvm::VPReplicateRecipe::getOpcode
unsigned getOpcode() const
Definition: VPlan.h:2514

llvm::VPReplicateRecipe::shouldPack
bool shouldPack() const
Returns true if the recipe is used by a widened recipe via an intervening VPPredInstPHIRecipe.
Definition: VPlanRecipes.cpp:2375

llvm::VPReverseVectorPointerRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:2096

llvm::VPReverseVectorPointerRecipe::execute
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition: VPlanRecipes.cpp:2070

llvm::VPReverseVectorPointerRecipe::getVFValue
VPValue * getVFValue()
Definition: VPlan.h:1581

llvm::VPScalarIVStepsRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:1973

llvm::VPScalarIVStepsRecipe::getStepValue
VPValue * getStepValue() const
Definition: VPlan.h:3187

llvm::VPScalarIVStepsRecipe::execute
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
Definition: VPlanRecipes.cpp:1888

llvm::VPScalarPHIRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:3615

llvm::VPScalarPHIRecipe::execute
void execute(VPTransformState &State) override
Generate the phi/select nodes.
Definition: VPlanRecipes.cpp:3605

llvm::VPSingleDefRecipe::getUnderlyingInstr
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition: VPlan.h:563

llvm::VPSingleDefRecipe::dump
LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
Definition: VPlanRecipes.cpp:364

llvm::VPSlotTracker
This class can be used to assign names to VPValues.
Definition: VPlanHelpers.h:389

llvm::VPTypeAnalysis::getContext
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Definition: VPlanAnalysis.h:65

llvm::VPTypeAnalysis::inferScalarType
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
Definition: VPlanAnalysis.cpp:216

llvm::VPUnrollPartAccessor::getUnrollPartOperand
VPValue * getUnrollPartOperand(VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
Definition: VPlanRecipes.cpp:369

llvm::VPUnrollPartAccessor::getUnrollPart
unsigned getUnrollPart(VPUser &U) const
Return the unroll part.
Definition: VPlanRecipes.cpp:376

llvm::VPUser
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:206

llvm::VPUser::printOperands
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition: VPlan.cpp:1474

llvm::VPUser::operands
operand_range operands()
Definition: VPlanValue.h:263

llvm::VPUser::setOperand
void setOperand(unsigned I, VPValue *New)
Definition: VPlanValue.h:248

llvm::VPUser::getNumOperands
unsigned getNumOperands() const
Definition: VPlanValue.h:242

llvm::VPUser::op_begin
operand_iterator op_begin()
Definition: VPlanValue.h:259

llvm::VPUser::getOperand
VPValue * getOperand(unsigned N) const
Definition: VPlanValue.h:243

llvm::VPUser::onlyFirstLaneUsed
virtual bool onlyFirstLaneUsed(const VPValue *Op) const
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlanValue.h:278

llvm::VPValue
Definition: VPlanValue.h:46

llvm::VPValue::isDefinedOutsideLoopRegions
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop region.
Definition: VPlan.cpp:1435

llvm::VPValue::getDefiningRecipe
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:125

llvm::VPValue::printAsOperand
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition: VPlan.cpp:1470

llvm::VPValue::VPInstruction
friend class VPInstruction
Definition: VPlanValue.h:50

llvm::VPValue::hasMoreThanOneUniqueUser
bool hasMoreThanOneUniqueUser() const
Returns true if the value has more than one unique user.
Definition: VPlanValue.h:144

llvm::VPValue::getUnderlyingValue
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition: VPlanValue.h:89

llvm::VPValue::user_begin
user_iterator user_begin()
Definition: VPlanValue.h:134

llvm::VPValue::getNumUsers
unsigned getNumUsers() const
Definition: VPlanValue.h:117

llvm::VPValue::getLiveInIRValue
Value * getLiveInIRValue()
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition: VPlanValue.h:178

llvm::VPValue::isLiveIn
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
Definition: VPlanValue.h:173

llvm::VPValue::users
user_range users()
Definition: VPlanValue.h:138

llvm::VPVectorPointerRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:2122

llvm::VPVectorPointerRecipe::execute
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition: VPlanRecipes.cpp:2106

llvm::VPWidenCallRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:1030

llvm::VPWidenCallRecipe::getCalledScalarFunction
Function * getCalledScalarFunction() const
Definition: VPlan.h:1419

llvm::VPWidenCallRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCallRecipe.
Definition: VPlanRecipes.cpp:1022

llvm::VPWidenCallRecipe::execute
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
Definition: VPlanRecipes.cpp:988

llvm::VPWidenCallRecipe::arg_operands
operand_range arg_operands()
Definition: VPlan.h:1423

llvm::VPWidenCanonicalIVRecipe::execute
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
Definition: VPlanRecipes.cpp:3357

llvm::VPWidenCanonicalIVRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:3376

llvm::VPWidenCastRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:1679

llvm::VPWidenCastRecipe::getResultType
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1242

llvm::VPWidenCastRecipe::execute
void execute(VPTransformState &State) override
Produce widened copies of the cast.
Definition: VPlanRecipes.cpp:1609

llvm::VPWidenCastRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
Definition: VPlanRecipes.cpp:1624

llvm::VPWidenEVLRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override final
Print the recipe.
Definition: VPlanRecipes.cpp:1599

llvm::VPWidenEVLRecipe::execute
void execute(VPTransformState &State) override final
Produce a vp-intrinsic using the opcode and operands of the recipe, processing EVL elements.
Definition: VPlanRecipes.cpp:1553

llvm::VPWidenEVLRecipe::getEVL
VPValue * getEVL()
Definition: VPlan.h:1170

llvm::VPWidenGEPRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:2045

llvm::VPWidenGEPRecipe::execute
void execute(VPTransformState &State) override
Generate the gep nodes.
Definition: VPlanRecipes.cpp:1982

llvm::VPWidenInductionRecipe::getPHINode
PHINode * getPHINode() const
Definition: VPlan.h:1785

llvm::VPWidenInductionRecipe::getStepValue
VPValue * getStepValue()
Returns the step value of the induction.
Definition: VPlan.h:1782

llvm::VPWidenInductionRecipe::getInductionDescriptor
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:1788

llvm::VPWidenIntOrFpInductionRecipe::getTruncInst
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition: VPlan.h:1860

llvm::VPWidenIntOrFpInductionRecipe::execute
void execute(VPTransformState &State) override
Generate the vectorized and scalarized versions of the phi node as needed by their users.
Definition: VPlanRecipes.cpp:1750

llvm::VPWidenIntOrFpInductionRecipe::getVFValue
VPValue * getVFValue()
Definition: VPlan.h:1849

llvm::VPWidenIntOrFpInductionRecipe::getScalarType
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:1869

llvm::VPWidenIntOrFpInductionRecipe::isCanonical
bool isCanonical() const
Returns true if the induction is canonical, i.e.
Definition: VPlanRecipes.cpp:1861

llvm::VPWidenIntOrFpInductionRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:1849

llvm::VPWidenIntOrFpInductionRecipe::getSplatVFValue
VPValue * getSplatVFValue()
Definition: VPlan.h:1852

llvm::VPWidenIntrinsicRecipe::onlyFirstLaneUsed
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlanRecipes.cpp:1150

llvm::VPWidenIntrinsicRecipe::getIntrinsicName
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
Definition: VPlanRecipes.cpp:1146

llvm::VPWidenIntrinsicRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:1159

llvm::VPWidenIntrinsicRecipe::getResultType
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition: VPlan.h:1362

llvm::VPWidenIntrinsicRecipe::execute
void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
Definition: VPlanRecipes.cpp:1057

llvm::VPWidenIntrinsicRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
Definition: VPlanRecipes.cpp:1102

llvm::VPWidenMemoryRecipe::IsMasked
bool IsMasked
Whether the memory access is masked.
Definition: VPlan.h:2625

llvm::VPWidenMemoryRecipe::Reverse
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition: VPlan.h:2622

llvm::VPWidenMemoryRecipe::isConsecutive
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition: VPlan.h:2661

llvm::VPWidenMemoryRecipe::Ingredient
Instruction & Ingredient
Definition: VPlan.h:2616

llvm::VPWidenMemoryRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
Definition: VPlanRecipes.cpp:2545

llvm::VPWidenMemoryRecipe::Consecutive
bool Consecutive
Whether the accessed addresses are consecutive.
Definition: VPlan.h:2619

llvm::VPWidenMemoryRecipe::getMask
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2675

llvm::VPWidenMemoryRecipe::getAddr
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2668

llvm::VPWidenMemoryRecipe::isReverse
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition: VPlan.h:2665

llvm::VPWidenPHIRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:3551

llvm::VPWidenPHIRecipe::execute
void execute(VPTransformState &State) override
Generate the phi/select nodes.
Definition: VPlanRecipes.cpp:3539

llvm::VPWidenPointerInductionRecipe::onlyScalarsGenerated
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
Definition: VPlanRecipes.cpp:3219

llvm::VPWidenPointerInductionRecipe::getFirstUnrolledPartOperand
VPValue * getFirstUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the first unrolled part,...
Definition: VPlan.h:1914

llvm::VPWidenPointerInductionRecipe::execute
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
Definition: VPlanRecipes.cpp:3224

llvm::VPWidenPointerInductionRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:3307

llvm::VPWidenRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
Definition: VPlanRecipes.cpp:1480

llvm::VPWidenRecipe::execute
void execute(VPTransformState &State) override
Produce a widened instruction using the opcode and operands of the recipe, processing State....
Definition: VPlanRecipes.cpp:1394

llvm::VPWidenRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:1590

llvm::VPWidenRecipe::getOpcode
unsigned getOpcode() const
Definition: VPlan.h:1136

llvm::VPlan::getUF
unsigned getUF() const
Definition: VPlan.h:3685

llvm::Value
LLVM Value Representation.
Definition: Value.h:74

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255

llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377

llvm::Value::stripPointerCasts
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:694

llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1094

llvm::Value::hasName
bool hasName() const
Definition: Value.h:261

llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309

llvm::VectorBuilder
Definition: VectorBuilder.h:25

llvm::VectorBuilder::setEVL
VectorBuilder & setEVL(Value *NewExplicitVectorLength)
Definition: VectorBuilder.h:82

llvm::VectorBuilder::setMask
VectorBuilder & setMask(Value *NewMask)
Definition: VectorBuilder.h:78

llvm::VectorBuilder::createVectorInstruction
Value * createVectorInstruction(unsigned Opcode, Type *ReturnTy, ArrayRef< Value * > VecOpArray, const Twine &Name=Twine())
Definition: VectorBuilder.cpp:54

llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:427

llvm::VectorType::getElementCount
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:665

llvm::VectorType::get
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.

llvm::VectorType::getDoubleElementsVectorType
static VectorType * getDoubleElementsVectorType(VectorType *VTy)
This static method returns a VectorType with twice as many elements as the input type and the same el...
Definition: DerivedTypes.h:541

llvm::VectorType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:460

llvm::cl::opt
Definition: CommandLine.h:1423

llvm::details::FixedOrScalableQuantity::isScalable
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171

llvm::details::FixedOrScalableQuantity::getKnownMinValue
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168

llvm::details::FixedOrScalableQuantity::divideCoefficientBy
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254

llvm::ilist_detail::node_parent_access::getParent
const ParentTy * getParent() const
Definition: ilist_node.h:32

llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:132

llvm::iplist_impl< simple_ilist< T, Options... >, ilist_traits< T > >::iterator
base_list_type::iterator iterator
Definition: ilist.h:121

llvm::iplist_impl::erase
iterator erase(iterator where)
Definition: ilist.h:204

llvm::iplist_impl::remove
pointer remove(iterator &IT)
Definition: ilist.h:188

llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52

unsigned

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143

llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34

llvm::Intrinsic::getOrInsertDeclaration
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:732

llvm::Intrinsic::getBaseName
StringRef getBaseName(ID id)
Return the LLVM name for an intrinsic, without encoded types for overloading, such as "llvm....
Definition: Intrinsics.cpp:42

llvm::NVPTX::PTXLdStInstCode::V2
@ V2
Definition: NVPTX.h:163

llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49

llvm::PatternMatch::m_LogicalOr
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
Definition: PatternMatch.h:3099

llvm::PatternMatch::m_LogicalAnd
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
Definition: PatternMatch.h:3081

llvm::VPlanPatternMatch
Definition: VPlanPatternMatch.h:26

llvm::X86::FirstMacroFusionInstKind::Cmp
@ Cmp

llvm::logicalview::LVAttributeKind::Zero
@ Zero

llvm::vputils::isUniformAfterVectorization
bool isUniformAfterVectorization(const VPValue *VPV)
Returns true if VPV is uniform after vectorization.
Definition: VPlanUtils.h:41

llvm::vputils::onlyFirstPartUsed
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
Definition: VPlanUtils.cpp:21

llvm::vputils::onlyFirstLaneUsed
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
Definition: VPlanUtils.cpp:16

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::ReplaceInstWithInst
void ReplaceInstWithInst(BasicBlock *BB, BasicBlock::iterator &BI, Instruction *I)
Replace the instruction specified by BI with the instruction specified by I.
Definition: BasicBlockUtils.cpp:723

llvm::createSimpleReduction
Value * createSimpleReduction(IRBuilderBase &B, Value *Src, RecurKind RdxKind)
Create a reduction of the given vector.
Definition: LoopUtils.cpp:1278

llvm::Offset
@ Offset
Definition: DWP.cpp:480

llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739

llvm::getLoadStoreAddressSpace
unsigned getLoadStoreAddressSpace(const Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
Definition: Instructions.h:5030

llvm::getMinMaxReductionIntrinsicOp
Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
Definition: LoopUtils.cpp:989

llvm::enumerate
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2448

llvm::getLoadStorePointerOperand
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Definition: Instructions.h:4984

llvm::getRuntimeVF
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
Definition: LoopVectorize.cpp:894

llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:77

llvm::interleaveComma
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
Definition: STLExtras.h:2207

llvm::concatenateVectors
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Definition: VectorUtils.cpp:1095

llvm::getLoadStoreAlignment
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
Definition: Instructions.h:5010

llvm::createMinMaxOp
Value * createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right)
Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
Definition: LoopUtils.cpp:1076

llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746

llvm::createBitMaskForGaps
Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
Definition: VectorUtils.cpp:992

llvm::createStrideMask
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
Definition: VectorUtils.cpp:1032

llvm::EnableVPlanNativePath
cl::opt< bool > EnableVPlanNativePath("enable-vplan-native-path", cl::Hidden, cl::desc("Enable VPlan-native vectorization path with " "support for outer loop vectorization."))
Definition: VPlan.cpp:54

llvm::createReplicatedMask
llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
Definition: VectorUtils.cpp:1012

llvm::ComplexDeinterleavingOperation::Splat
@ Splat

llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163

llvm::isPointerTy
bool isPointerTy(const Type *T)
Definition: SPIRVUtils.h:256

llvm::createOrderedReduction
Value * createOrderedReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, Value *Start)
Create an ordered reduction intrinsic using the given recurrence descriptor Desc.
Definition: LoopUtils.cpp:1341

llvm::createReduction
Value * createReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, PHINode *OrigPhi=nullptr)
Create a generic reduction using a recurrence descriptor Desc Fast-math-flags are propagated using th...
Definition: LoopUtils.cpp:1323

llvm::CaptureComponents::Address
@ Address

llvm::createInterleaveMask
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
Definition: VectorUtils.cpp:1021

llvm::RecurKind
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:33

llvm::RecurKind::Mul
@ Mul
Product of integers.

llvm::RecurKind::SMax
@ SMax
Signed integer max implemented in terms of select(cmp()).

llvm::RecurKind::Add
@ Add
Sum of integers.

llvm::isVectorIntrinsicWithScalarOpAtArg
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:134

llvm::getRecurrenceIdentity
Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
Definition: LoopUtils.cpp:1270

llvm::Op
DWARFExpression::Operation Op
Definition: DWARFExpression.cpp:22

llvm::createStepForVF
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
Definition: LoopVectorize.cpp:887

llvm::predecessors
auto predecessors(const MachineBasicBlock *BB)
Definition: MachineBasicBlock.h:1377

llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903

llvm::getLoadStoreType
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
Definition: Instructions.h:5039

llvm::Cost
InstructionCost Cost
Definition: FunctionSpecialization.h:102

llvm::toVectorTy
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
Definition: VectorTypeUtils.h:19

llvm::isVectorIntrinsicWithOverloadTypeAtArg
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
Definition: VectorUtils.cpp:162

raw_ostream.h

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39

llvm::TargetTransformInfo::OperandValueInfo
Definition: TargetTransformInfo.h:1135

llvm::TargetTransformInfo::OperandValueInfo::Kind
OperandValueKind Kind
Definition: TargetTransformInfo.h:1136

llvm::VPCostContext
Struct to hold various analysis needed for cost computations.
Definition: VPlanHelpers.h:356

llvm::VPCostContext::LLVMCtx
LLVMContext & LLVMCtx
Definition: VPlanHelpers.h:360

llvm::VPCostContext::getOperandInfo
TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const
Returns the OperandInfo for V, if it is a live-in.
Definition: VPlan.cpp:1634

llvm::VPCostContext::skipCostComputation
bool skipCostComputation(Instruction *UI, bool IsVector) const
Return true if the cost for UI shouldn't be computed, e.g.
Definition: LoopVectorize.cpp:7202

llvm::VPCostContext::getLegacyCost
InstructionCost getLegacyCost(Instruction *UI, ElementCount VF) const
Return the cost for UI with VF using the legacy cost model as fallback until computing the cost of al...
Definition: LoopVectorize.cpp:7195

llvm::VPCostContext::CostKind
TargetTransformInfo::TargetCostKind CostKind
Definition: VPlanHelpers.h:363

llvm::VPCostContext::Types
VPTypeAnalysis Types
Definition: VPlanHelpers.h:359

llvm::VPCostContext::TLI
const TargetLibraryInfo & TLI
Definition: VPlanHelpers.h:358

llvm::VPCostContext::TTI
const TargetTransformInfo & TTI
Definition: VPlanHelpers.h:357

llvm::VPCostContext::SkipCostComputation
SmallPtrSet< Instruction *, 8 > SkipCostComputation
Definition: VPlanHelpers.h:362

llvm::VPFirstOrderRecurrencePHIRecipe::execute
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition: VPlanRecipes.cpp:3385

llvm::VPFirstOrderRecurrencePHIRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
Definition: VPlanRecipes.cpp:3414

llvm::VPFirstOrderRecurrencePHIRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:3433

llvm::VPRecipeWithIRFlags::DisjointFlagsTy::IsDisjoint
char IsDisjoint
Definition: VPlan.h:598

llvm::VPRecipeWithIRFlags::WrapFlagsTy::HasNUW
char HasNUW
Definition: VPlan.h:591

llvm::VPRecipeWithIRFlags::WrapFlagsTy::HasNSW
char HasNSW
Definition: VPlan.h:592

llvm::VPTransformState::CFGState::PrevBB
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
Definition: VPlanHelpers.h:304

llvm::VPTransformState::CFGState::VPBB2IRBB
SmallDenseMap< VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition: VPlanHelpers.h:312

llvm::VPTransformState::CFGState::getPreheaderBBFor
BasicBlock * getPreheaderBBFor(VPRecipeBase *R)
Returns the BasicBlock* mapped to the pre-header of the loop region containing R.
Definition: VPlan.cpp:349

llvm::VPTransformState
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlanHelpers.h:196

llvm::VPTransformState::hasScalarValue
bool hasScalarValue(VPValue *Def, VPLane Lane)
Definition: VPlanHelpers.h:229

llvm::VPTransformState::hasVectorValue
bool hasVectorValue(VPValue *Def)
Definition: VPlanHelpers.h:227

llvm::VPTransformState::ExpandedSCEVs
DenseMap< const SCEV *, Value * > ExpandedSCEVs
Map SCEVs to their expanded values.
Definition: VPlanHelpers.h:349

llvm::VPTransformState::TypeAnalysis
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
Definition: VPlanHelpers.h:352

llvm::VPTransformState::addMetadata
void addMetadata(Value *To, Instruction *From)
Add metadata from one instruction to another.
Definition: VPlan.cpp:362

llvm::VPTransformState::get
Value * get(VPValue *Def, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def if IsScalar is false, otherwise return the gen...
Definition: VPlan.cpp:251

llvm::VPTransformState::CFG
struct llvm::VPTransformState::CFGState CFG

llvm::VPTransformState::Lane
std::optional< VPLane > Lane
Hold the index to generate specific scalar instructions.
Definition: VPlanHelpers.h:210

llvm::VPTransformState::Builder
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition: VPlanHelpers.h:329

llvm::VPTransformState::TTI
const TargetTransformInfo * TTI
Target Transform Info.
Definition: VPlanHelpers.h:202

llvm::VPTransformState::reset
void reset(VPValue *Def, Value *V)
Reset an existing vector value for Def and a given Part.
Definition: VPlanHelpers.h:250

llvm::VPTransformState::VF
ElementCount VF
The chosen Vectorization Factor of the loop being vectorized.
Definition: VPlanHelpers.h:205

llvm::VPTransformState::setDebugLocFrom
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition: VPlan.cpp:373

llvm::VPTransformState::CurrentParentLoop
Loop * CurrentParentLoop
The parent loop object for the current scope, or nullptr.
Definition: VPlanHelpers.h:338

llvm::VPTransformState::set
void set(VPValue *Def, Value *V, bool IsScalar=false)
Set the generated vector Value for a given VPValue, if IsScalar is false.
Definition: VPlanHelpers.h:239

llvm::VPWidenLoadEVLRecipe::execute
void execute(VPTransformState &State) override
Generate the wide load or gather.
Definition: VPlanRecipes.cpp:2643

llvm::VPWidenLoadEVLRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
Definition: VPlanRecipes.cpp:2684

llvm::VPWidenLoadEVLRecipe::getEVL
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:2745

llvm::VPWidenLoadEVLRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:2710

llvm::VPWidenLoadRecipe::execute
void execute(VPTransformState &State) override
Generate a wide load or gather.
Definition: VPlanRecipes.cpp:2584

llvm::VPWidenLoadRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:2623

llvm::VPWidenSelectRecipe::isInvariantCond
bool isInvariantCond() const
Definition: VPlan.h:1514

llvm::VPWidenSelectRecipe::getCond
VPValue * getCond() const
Definition: VPlan.h:1510

llvm::VPWidenSelectRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:1266

llvm::VPWidenSelectRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenSelectRecipe.
Definition: VPlanRecipes.cpp:1301

llvm::VPWidenSelectRecipe::execute
void execute(VPTransformState &State) override
Produce a widened version of the select instruction.
Definition: VPlanRecipes.cpp:1281

llvm::VPWidenStoreEVLRecipe::getStoredValue
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition: VPlan.h:2824

llvm::VPWidenStoreEVLRecipe::execute
void execute(VPTransformState &State) override
Generate the wide store or scatter.
Definition: VPlanRecipes.cpp:2765

llvm::VPWidenStoreEVLRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:2831

llvm::VPWidenStoreEVLRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
Definition: VPlanRecipes.cpp:2805

llvm::VPWidenStoreEVLRecipe::getEVL
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:2827

llvm::VPWidenStoreRecipe::execute
void execute(VPTransformState &State) override
Generate a wide store or scatter.
Definition: VPlanRecipes.cpp:2719

llvm::VPWidenStoreRecipe::getStoredValue
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition: VPlan.h:2789

llvm::VPWidenStoreRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:2758

llvm::VPlanIngredient
Definition: VPlan.h:3804