LLVM: lib/Transforms/Scalar/LoopStrengthReduce.cpp Source File

//===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// This transformation analyzes and transforms the induction variables (and

// computations derived from them) into forms suitable for efficient execution

// on the target.

//

// This pass performs a strength reduction on array references inside loops that

// have as one or more of their components the loop induction variable, it

// rewrites expressions to take advantage of scaled-index addressing modes

// available on the target, and it performs a variety of other optimizations

// related to loop induction variables.

//

// Terminology note: this code has a lot of handling for "post-increment" or

// "post-inc" users. This is not talking about post-increment addressing modes;

// it is instead talking about code like this:

//

//   %i = phi [ 0, %entry ], [ %i.next, %latch ]

//   ...

//   %i.next = add %i, 1

//   %c = icmp eq %i.next, %n

//

// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however

// it's useful to think about these as the same register, with some uses using

// the value of the register before the add and some using it after. In this

// example, the icmp is a post-increment user, since it uses %i.next, which is

// the value of the induction variable after the increment. The other common

// case of post-increment users is users outside the loop.

//

// TODO: More sophistication in the way Formulae are generated and filtered.

//

// TODO: Handle multiple loops at a time.

//

// TODO: Should the addressing mode BaseGV be changed to a ConstantExpr instead

//       of a GlobalValue?

//

// TODO: When truncation is free, truncate ICmp users' operands to make it a

//       smaller encoding (on x86 at least).

//

// TODO: When a negated register is used by an add (such as in a list of

//       multiple base registers, or as the increment expression in an addrec),

//       we may not actually need both reg and (-1 * reg) in registers; the

//       negation can be implemented by using a sub instead of an add. The

//       lack of support for taking this into consideration when making

//       register pressure decisions is partly worked around by the "Special"

//       use kind.

//

//===----------------------------------------------------------------------===//


#include "llvm/Transforms/Scalar/LoopStrengthReduce.h"

#include "llvm/ADT/APInt.h"

#include "llvm/ADT/DenseMap.h"

#include "llvm/ADT/DenseSet.h"

#include "llvm/ADT/PointerIntPair.h"

#include "llvm/ADT/STLExtras.h"

#include "llvm/ADT/SetVector.h"

#include "llvm/ADT/SmallBitVector.h"

#include "llvm/ADT/SmallPtrSet.h"

#include "llvm/ADT/SmallSet.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/ADT/Statistic.h"

#include "llvm/ADT/iterator_range.h"

#include "llvm/Analysis/AssumptionCache.h"

#include "llvm/Analysis/DomTreeUpdater.h"

#include "llvm/Analysis/IVUsers.h"

#include "llvm/Analysis/LoopAnalysisManager.h"

#include "llvm/Analysis/LoopInfo.h"

#include "llvm/Analysis/LoopPass.h"

#include "llvm/Analysis/MemorySSA.h"

#include "llvm/Analysis/MemorySSAUpdater.h"

#include "llvm/Analysis/ScalarEvolution.h"

#include "llvm/Analysis/ScalarEvolutionExpressions.h"

#include "llvm/Analysis/ScalarEvolutionNormalization.h"

#include "llvm/Analysis/ScalarEvolutionPatternMatch.h"

#include "llvm/Analysis/TargetLibraryInfo.h"

#include "llvm/Analysis/TargetTransformInfo.h"

#include "llvm/Analysis/ValueTracking.h"

#include "llvm/BinaryFormat/Dwarf.h"

#include "llvm/IR/BasicBlock.h"

#include "llvm/IR/Constant.h"

#include "llvm/IR/Constants.h"

#include "llvm/IR/DebugInfoMetadata.h"

#include "llvm/IR/DerivedTypes.h"

#include "llvm/IR/Dominators.h"

#include "llvm/IR/GlobalValue.h"

#include "llvm/IR/IRBuilder.h"

#include "llvm/IR/InstrTypes.h"

#include "llvm/IR/Instruction.h"

#include "llvm/IR/Instructions.h"

#include "llvm/IR/IntrinsicInst.h"

#include "llvm/IR/Module.h"

#include "llvm/IR/Operator.h"

#include "llvm/IR/Type.h"

#include "llvm/IR/Use.h"

#include "llvm/IR/User.h"

#include "llvm/IR/Value.h"

#include "llvm/IR/ValueHandle.h"

#include "llvm/InitializePasses.h"

#include "llvm/Pass.h"

#include "llvm/Support/Casting.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Support/Compiler.h"

#include "llvm/Support/Debug.h"

#include "llvm/Support/ErrorHandling.h"

#include "llvm/Support/MathExtras.h"

#include "llvm/Support/raw_ostream.h"

#include "llvm/Transforms/Scalar.h"

#include "llvm/Transforms/Utils.h"

#include "llvm/Transforms/Utils/BasicBlockUtils.h"

#include "llvm/Transforms/Utils/Local.h"

#include "llvm/Transforms/Utils/LoopUtils.h"

#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"

#include <algorithm>

#include <cassert>

#include <cstddef>

#include <cstdint>

#include <iterator>

#include <limits>

#include <map>

#include <numeric>

#include <optional>

#include <utility>


using namespace llvm;

using namespace SCEVPatternMatch;


#define DEBUG_TYPE "loop-reduce"


/// MaxIVUsers is an arbitrary threshold that provides an early opportunity for

/// bail out. This threshold is far beyond the number of users that LSR can

/// conceivably solve, so it should not affect generated code, but catches the

/// worst cases before LSR burns too much compile time and stack space.

static const unsigned MaxIVUsers = 200;


/// Limit the size of expression that SCEV-based salvaging will attempt to

/// translate into a DIExpression.

/// Choose a maximum size such that debuginfo is not excessively increased and

/// the salvaging is not too expensive for the compiler.

static const unsigned MaxSCEVSalvageExpressionSize = 64;


// Cleanup congruent phis after LSR phi expansion.

static cl::opt<bool> EnablePhiElim(

  "enable-lsr-phielim", cl::Hidden, cl::init(true),

  cl::desc("Enable LSR phi elimination"));


// The flag adds instruction count to solutions cost comparison.

static cl::opt<bool> InsnsCost(

  "lsr-insns-cost", cl::Hidden, cl::init(true),

  cl::desc("Add instruction count to a LSR cost model"));


// Flag to choose how to narrow complex lsr solution

static cl::opt<bool> LSRExpNarrow(

  "lsr-exp-narrow", cl::Hidden, cl::init(false),

  cl::desc("Narrow LSR complex solution using"

           " expectation of registers number"));


// Flag to narrow search space by filtering non-optimal formulae with

// the same ScaledReg and Scale.

static cl::opt<bool> FilterSameScaledReg(

    "lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true),

    cl::desc("Narrow LSR search space by filtering non-optimal formulae"

             " with the same ScaledReg and Scale"));


static cl::opt<TTI::AddressingModeKind> PreferredAddresingMode(

  "lsr-preferred-addressing-mode", cl::Hidden, cl::init(TTI::AMK_None),

   cl::desc("A flag that overrides the target's preferred addressing mode."),

   cl::values(clEnumValN(TTI::AMK_None,

                         "none",

                         "Don't prefer any addressing mode"),

              clEnumValN(TTI::AMK_PreIndexed,

                         "preindexed",

                         "Prefer pre-indexed addressing mode"),

              clEnumValN(TTI::AMK_PostIndexed,

                         "postindexed",

                         "Prefer post-indexed addressing mode")));


static cl::opt<unsigned> ComplexityLimit(

  "lsr-complexity-limit", cl::Hidden,

  cl::init(std::numeric_limits<uint16_t>::max()),

  cl::desc("LSR search space complexity limit"));


static cl::opt<unsigned> SetupCostDepthLimit(

    "lsr-setupcost-depth-limit", cl::Hidden, cl::init(7),

    cl::desc("The limit on recursion depth for LSRs setup cost"));


static cl::opt<cl::boolOrDefault> AllowDropSolutionIfLessProfitable(

    "lsr-drop-solution", cl::Hidden,

    cl::desc("Attempt to drop solution if it is less profitable"));


static cl::opt<bool> EnableVScaleImmediates(

    "lsr-enable-vscale-immediates", cl::Hidden, cl::init(true),

    cl::desc("Enable analysis of vscale-relative immediates in LSR"));


static cl::opt<bool> DropScaledForVScale(

    "lsr-drop-scaled-reg-for-vscale", cl::Hidden, cl::init(true),

    cl::desc("Avoid using scaled registers with vscale-relative addressing"));


#ifndef NDEBUG

// Stress test IV chain generation.

static cl::opt<bool> StressIVChain(

  "stress-ivchain", cl::Hidden, cl::init(false),

  cl::desc("Stress test LSR IV chains"));

#else

static bool StressIVChain = false;

#endif


namespace {


struct MemAccessTy {

  /// Used in situations where the accessed memory type is unknown.

  static const unsigned UnknownAddressSpace =

      std::numeric_limits<unsigned>::max();


  Type *MemTy = nullptr;

  unsigned AddrSpace = UnknownAddressSpace;


  MemAccessTy() = default;

  MemAccessTy(Type *Ty, unsigned AS) : MemTy(Ty), AddrSpace(AS) {}


  bool operator==(MemAccessTy Other) const {

    return MemTy == Other.MemTy && AddrSpace == Other.AddrSpace;

  }


  bool operator!=(MemAccessTy Other) const { return !(*this == Other); }


  static MemAccessTy getUnknown(LLVMContext &Ctx,

                                unsigned AS = UnknownAddressSpace) {

    return MemAccessTy(Type::getVoidTy(Ctx), AS);

  }


  Type *getType() { return MemTy; }

};


/// This class holds data which is used to order reuse candidates.

class RegSortData {

public:

  /// This represents the set of LSRUse indices which reference

  /// a particular register.

  SmallBitVector UsedByIndices;


  void print(raw_ostream &OS) const;

  void dump() const;

};


// An offset from an address that is either scalable or fixed. Used for

// per-target optimizations of addressing modes.

class Immediate : public details::FixedOrScalableQuantity<Immediate, int64_t> {

  constexpr Immediate(ScalarTy MinVal, bool Scalable)

      : FixedOrScalableQuantity(MinVal, Scalable) {}


  constexpr Immediate(const FixedOrScalableQuantity<Immediate, int64_t> &V)

      : FixedOrScalableQuantity(V) {}


public:

  constexpr Immediate() = delete;


  static constexpr Immediate getFixed(ScalarTy MinVal) {

    return {MinVal, false};

  }

  static constexpr Immediate getScalable(ScalarTy MinVal) {

    return {MinVal, true};

  }

  static constexpr Immediate get(ScalarTy MinVal, bool Scalable) {

    return {MinVal, Scalable};

  }

  static constexpr Immediate getZero() { return {0, false}; }

  static constexpr Immediate getFixedMin() {

    return {std::numeric_limits<int64_t>::min(), false};

  }

  static constexpr Immediate getFixedMax() {

    return {std::numeric_limits<int64_t>::max(), false};

  }

  static constexpr Immediate getScalableMin() {

    return {std::numeric_limits<int64_t>::min(), true};

  }

  static constexpr Immediate getScalableMax() {

    return {std::numeric_limits<int64_t>::max(), true};

  }


  constexpr bool isLessThanZero() const { return Quantity < 0; }


  constexpr bool isGreaterThanZero() const { return Quantity > 0; }


  constexpr bool isCompatibleImmediate(const Immediate &Imm) const {

    return isZero() || Imm.isZero() || Imm.Scalable == Scalable;

  }


  constexpr bool isMin() const {

    return Quantity == std::numeric_limits<ScalarTy>::min();

  }


  constexpr bool isMax() const {

    return Quantity == std::numeric_limits<ScalarTy>::max();

  }


  // Arithmetic 'operators' that cast to unsigned types first.

  constexpr Immediate addUnsigned(const Immediate &RHS) const {

    assert(isCompatibleImmediate(RHS) && "Incompatible Immediates");

    ScalarTy Value = (uint64_t)Quantity + RHS.getKnownMinValue();

    return {Value, Scalable || RHS.isScalable()};

  }


  constexpr Immediate subUnsigned(const Immediate &RHS) const {

    assert(isCompatibleImmediate(RHS) && "Incompatible Immediates");

    ScalarTy Value = (uint64_t)Quantity - RHS.getKnownMinValue();

    return {Value, Scalable || RHS.isScalable()};

  }


  // Scale the quantity by a constant without caring about runtime scalability.

  constexpr Immediate mulUnsigned(const ScalarTy RHS) const {

    ScalarTy Value = (uint64_t)Quantity * RHS;

    return {Value, Scalable};

  }


  // Helpers for generating SCEVs with vscale terms where needed.

  const SCEV *getSCEV(ScalarEvolution &SE, Type *Ty) const {

    const SCEV *S = SE.getConstant(Ty, Quantity);

    if (Scalable)

      S = SE.getMulExpr(S, SE.getVScale(S->getType()));

    return S;

  }


  const SCEV *getNegativeSCEV(ScalarEvolution &SE, Type *Ty) const {

    const SCEV *NegS = SE.getConstant(Ty, -(uint64_t)Quantity);

    if (Scalable)

      NegS = SE.getMulExpr(NegS, SE.getVScale(NegS->getType()));

    return NegS;

  }


  const SCEV *getUnknownSCEV(ScalarEvolution &SE, Type *Ty) const {

    const SCEV *SU = SE.getUnknown(ConstantInt::getSigned(Ty, Quantity));

    if (Scalable)

      SU = SE.getMulExpr(SU, SE.getVScale(SU->getType()));

    return SU;

  }

};


// This is needed for the Compare type of std::map when Immediate is used

// as a key. We don't need it to be fully correct against any value of vscale,

// just to make sure that vscale-related terms in the map are considered against

// each other rather than being mixed up and potentially missing opportunities.

struct KeyOrderTargetImmediate {

  bool operator()(const Immediate &LHS, const Immediate &RHS) const {

    if (LHS.isScalable() && !RHS.isScalable())

      return false;

    if (!LHS.isScalable() && RHS.isScalable())

      return true;

    return LHS.getKnownMinValue() < RHS.getKnownMinValue();

  }

};


// This would be nicer if we could be generic instead of directly using size_t,

// but there doesn't seem to be a type trait for is_orderable or

// is_lessthan_comparable or similar.

struct KeyOrderSizeTAndImmediate {

  bool operator()(const std::pair<size_t, Immediate> &LHS,

                  const std::pair<size_t, Immediate> &RHS) const {

    size_t LSize = LHS.first;

    size_t RSize = RHS.first;

    if (LSize != RSize)

      return LSize < RSize;

    return KeyOrderTargetImmediate()(LHS.second, RHS.second);

  }

};

} // end anonymous namespace


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void RegSortData::print(raw_ostream &OS) const {

  OS << "[NumUses=" << UsedByIndices.count() << ']';

}


LLVM_DUMP_METHOD void RegSortData::dump() const {

  print(errs()); errs() << '\n';

}

#endif


namespace {


/// Map register candidates to information about how they are used.

class RegUseTracker {

  using RegUsesTy = DenseMap<const SCEV *, RegSortData>;


  RegUsesTy RegUsesMap;

  SmallVector<const SCEV *, 16> RegSequence;


public:

  void countRegister(const SCEV *Reg, size_t LUIdx);

  void dropRegister(const SCEV *Reg, size_t LUIdx);

  void swapAndDropUse(size_t LUIdx, size_t LastLUIdx);


  bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;


  const SmallBitVector &getUsedByIndices(const SCEV *Reg) const;


  void clear();


  using iterator = SmallVectorImpl<const SCEV *>::iterator;

  using const_iterator = SmallVectorImpl<const SCEV *>::const_iterator;


  iterator begin() { return RegSequence.begin(); }

  iterator end()   { return RegSequence.end(); }

  const_iterator begin() const { return RegSequence.begin(); }

  const_iterator end() const   { return RegSequence.end(); }

};


} // end anonymous namespace


void

RegUseTracker::countRegister(const SCEV *Reg, size_t LUIdx) {

  std::pair<RegUsesTy::iterator, bool> Pair = RegUsesMap.try_emplace(Reg);

  RegSortData &RSD = Pair.first->second;

  if (Pair.second)

    RegSequence.push_back(Reg);

  RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));

  RSD.UsedByIndices.set(LUIdx);

}


void

RegUseTracker::dropRegister(const SCEV *Reg, size_t LUIdx) {

  RegUsesTy::iterator It = RegUsesMap.find(Reg);

  assert(It != RegUsesMap.end());

  RegSortData &RSD = It->second;

  assert(RSD.UsedByIndices.size() > LUIdx);

  RSD.UsedByIndices.reset(LUIdx);

}


void

RegUseTracker::swapAndDropUse(size_t LUIdx, size_t LastLUIdx) {

  assert(LUIdx <= LastLUIdx);


  // Update RegUses. The data structure is not optimized for this purpose;

  // we must iterate through it and update each of the bit vectors.

  for (auto &Pair : RegUsesMap) {

    SmallBitVector &UsedByIndices = Pair.second.UsedByIndices;

    if (LUIdx < UsedByIndices.size())

      UsedByIndices[LUIdx] =

        LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : false;

    UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx));

  }

}


bool

RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {

  RegUsesTy::const_iterator I = RegUsesMap.find(Reg);

  if (I == RegUsesMap.end())

    return false;

  const SmallBitVector &UsedByIndices = I->second.UsedByIndices;

  int i = UsedByIndices.find_first();

  if (i == -1) return false;

  if ((size_t)i != LUIdx) return true;

  return UsedByIndices.find_next(i) != -1;

}


const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const {

  RegUsesTy::const_iterator I = RegUsesMap.find(Reg);

  assert(I != RegUsesMap.end() && "Unknown register!");

  return I->second.UsedByIndices;

}


void RegUseTracker::clear() {

  RegUsesMap.clear();

  RegSequence.clear();

}


namespace {


/// This class holds information that describes a formula for computing

/// satisfying a use. It may include broken-out immediates and scaled registers.

struct Formula {

  /// Global base address used for complex addressing.

  GlobalValue *BaseGV = nullptr;


  /// Base offset for complex addressing.

  Immediate BaseOffset = Immediate::getZero();


  /// Whether any complex addressing has a base register.

  bool HasBaseReg = false;


  /// The scale of any complex addressing.

  int64_t Scale = 0;


  /// The list of "base" registers for this use. When this is non-empty. The

  /// canonical representation of a formula is

  /// 1. BaseRegs.size > 1 implies ScaledReg != NULL and

  /// 2. ScaledReg != NULL implies Scale != 1 || !BaseRegs.empty().

  /// 3. The reg containing recurrent expr related with currect loop in the

  /// formula should be put in the ScaledReg.

  /// #1 enforces that the scaled register is always used when at least two

  /// registers are needed by the formula: e.g., reg1 + reg2 is reg1 + 1 * reg2.

  /// #2 enforces that 1 * reg is reg.

  /// #3 ensures invariant regs with respect to current loop can be combined

  /// together in LSR codegen.

  /// This invariant can be temporarily broken while building a formula.

  /// However, every formula inserted into the LSRInstance must be in canonical

  /// form.

  SmallVector<const SCEV *, 4> BaseRegs;


  /// The 'scaled' register for this use. This should be non-null when Scale is

  /// not zero.

  const SCEV *ScaledReg = nullptr;


  /// An additional constant offset which added near the use. This requires a

  /// temporary register, but the offset itself can live in an add immediate

  /// field rather than a register.

  Immediate UnfoldedOffset = Immediate::getZero();


  Formula() = default;


  void initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);


  bool isCanonical(const Loop &L) const;


  void canonicalize(const Loop &L);


  bool unscale();


  bool hasZeroEnd() const;


  bool countsDownToZero() const;


  size_t getNumRegs() const;

  Type *getType() const;


  void deleteBaseReg(const SCEV *&S);


  bool referencesReg(const SCEV *S) const;

  bool hasRegsUsedByUsesOtherThan(size_t LUIdx,

                                  const RegUseTracker &RegUses) const;


  void print(raw_ostream &OS) const;

  void dump() const;

};


} // end anonymous namespace


/// Recursion helper for initialMatch.

static void DoInitialMatch(const SCEV *S, Loop *L,

                           SmallVectorImpl<const SCEV *> &Good,

                           SmallVectorImpl<const SCEV *> &Bad,

                           ScalarEvolution &SE) {

  // Collect expressions which properly dominate the loop header.

  if (SE.properlyDominates(S, L->getHeader())) {

    Good.push_back(S);

    return;

  }


  // Look at add operands.

  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {

    for (const SCEV *S : Add->operands())

      DoInitialMatch(S, L, Good, Bad, SE);

    return;

  }


  // Look at addrec operands.

  const SCEV *Start, *Step;

  const Loop *ARLoop;

  if (match(S,

            m_scev_AffineAddRec(m_SCEV(Start), m_SCEV(Step), m_Loop(ARLoop))) &&

      !Start->isZero()) {

    DoInitialMatch(Start, L, Good, Bad, SE);

    DoInitialMatch(SE.getAddRecExpr(SE.getConstant(S->getType(), 0), Step,

                                    // FIXME: AR->getNoWrapFlags()

                                    ARLoop, SCEV::FlagAnyWrap),

                   L, Good, Bad, SE);

    return;

  }


  // Handle a multiplication by -1 (negation) if it didn't fold.

  if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))

    if (Mul->getOperand(0)->isAllOnesValue()) {

      SmallVector<const SCEV *, 4> Ops(drop_begin(Mul->operands()));

      const SCEV *NewMul = SE.getMulExpr(Ops);


      SmallVector<const SCEV *, 4> MyGood;

      SmallVector<const SCEV *, 4> MyBad;

      DoInitialMatch(NewMul, L, MyGood, MyBad, SE);

      const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(

        SE.getEffectiveSCEVType(NewMul->getType())));

      for (const SCEV *S : MyGood)

        Good.push_back(SE.getMulExpr(NegOne, S));

      for (const SCEV *S : MyBad)

        Bad.push_back(SE.getMulExpr(NegOne, S));

      return;

    }


  // Ok, we can't do anything interesting. Just stuff the whole thing into a

  // register and hope for the best.

  Bad.push_back(S);

}


/// Incorporate loop-variant parts of S into this Formula, attempting to keep

/// all loop-invariant and loop-computable values in a single base register.

void Formula::initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {

  SmallVector<const SCEV *, 4> Good;

  SmallVector<const SCEV *, 4> Bad;

  DoInitialMatch(S, L, Good, Bad, SE);

  if (!Good.empty()) {

    const SCEV *Sum = SE.getAddExpr(Good);

    if (!Sum->isZero())

      BaseRegs.push_back(Sum);

    HasBaseReg = true;

  }

  if (!Bad.empty()) {

    const SCEV *Sum = SE.getAddExpr(Bad);

    if (!Sum->isZero())

      BaseRegs.push_back(Sum);

    HasBaseReg = true;

  }

  canonicalize(*L);

}


static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L) {

  return SCEVExprContains(S, [&L](const SCEV *S) {

    return isa<SCEVAddRecExpr>(S) && (cast<SCEVAddRecExpr>(S)->getLoop() == &L);

  });

}


/// Check whether or not this formula satisfies the canonical

/// representation.

/// \see Formula::BaseRegs.

bool Formula::isCanonical(const Loop &L) const {

  assert((Scale == 0 || ScaledReg) &&

         "ScaledReg must be non-null if Scale is non-zero");


  if (!ScaledReg)

    return BaseRegs.size() <= 1;


  if (Scale != 1)

    return true;


  if (Scale == 1 && BaseRegs.empty())

    return false;


  if (containsAddRecDependentOnLoop(ScaledReg, L))

    return true;


  // If ScaledReg is not a recurrent expr, or it is but its loop is not current

  // loop, meanwhile BaseRegs contains a recurrent expr reg related with current

  // loop, we want to swap the reg in BaseRegs with ScaledReg.

  return none_of(BaseRegs, [&L](const SCEV *S) {

    return containsAddRecDependentOnLoop(S, L);

  });

}


/// Helper method to morph a formula into its canonical representation.

/// \see Formula::BaseRegs.

/// Every formula having more than one base register, must use the ScaledReg

/// field. Otherwise, we would have to do special cases everywhere in LSR

/// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ...

/// On the other hand, 1*reg should be canonicalized into reg.

void Formula::canonicalize(const Loop &L) {

  if (isCanonical(L))

    return;


  if (BaseRegs.empty()) {

    // No base reg? Use scale reg with scale = 1 as such.

    assert(ScaledReg && "Expected 1*reg => reg");

    assert(Scale == 1 && "Expected 1*reg => reg");

    BaseRegs.push_back(ScaledReg);

    Scale = 0;

    ScaledReg = nullptr;

    return;

  }


  // Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg.

  if (!ScaledReg) {

    ScaledReg = BaseRegs.pop_back_val();

    Scale = 1;

  }


  // If ScaledReg is an invariant with respect to L, find the reg from

  // BaseRegs containing the recurrent expr related with Loop L. Swap the

  // reg with ScaledReg.

  if (!containsAddRecDependentOnLoop(ScaledReg, L)) {

    auto I = find_if(BaseRegs, [&L](const SCEV *S) {

      return containsAddRecDependentOnLoop(S, L);

    });

    if (I != BaseRegs.end())

      std::swap(ScaledReg, *I);

  }

  assert(isCanonical(L) && "Failed to canonicalize?");

}


/// Get rid of the scale in the formula.

/// In other words, this method morphes reg1 + 1*reg2 into reg1 + reg2.

/// \return true if it was possible to get rid of the scale, false otherwise.

/// \note After this operation the formula may not be in the canonical form.

bool Formula::unscale() {

  if (Scale != 1)

    return false;

  Scale = 0;

  BaseRegs.push_back(ScaledReg);

  ScaledReg = nullptr;

  return true;

}


bool Formula::hasZeroEnd() const {

  if (UnfoldedOffset || BaseOffset)

    return false;

  if (BaseRegs.size() != 1 || ScaledReg)

    return false;

  return true;

}


bool Formula::countsDownToZero() const {

  if (!hasZeroEnd())

    return false;

  assert(BaseRegs.size() == 1 && "hasZeroEnd should mean one BaseReg");

  const APInt *StepInt;

  if (!match(BaseRegs[0], m_scev_AffineAddRec(m_SCEV(), m_scev_APInt(StepInt))))

    return false;

  return StepInt->isNegative();

}


/// Return the total number of register operands used by this formula. This does

/// not include register uses implied by non-constant addrec strides.

size_t Formula::getNumRegs() const {

  return !!ScaledReg + BaseRegs.size();

}


/// Return the type of this formula, if it has one, or null otherwise. This type

/// is meaningless except for the bit size.

Type *Formula::getType() const {

  return !BaseRegs.empty() ? BaseRegs.front()->getType() :

         ScaledReg ? ScaledReg->getType() :

         BaseGV ? BaseGV->getType() :

         nullptr;

}


/// Delete the given base reg from the BaseRegs list.

void Formula::deleteBaseReg(const SCEV *&S) {

  if (&S != &BaseRegs.back())

    std::swap(S, BaseRegs.back());

  BaseRegs.pop_back();

}


/// Test if this formula references the given register.

bool Formula::referencesReg(const SCEV *S) const {

  return S == ScaledReg || is_contained(BaseRegs, S);

}


/// Test whether this formula uses registers which are used by uses other than

/// the use with the given index.

bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,

                                         const RegUseTracker &RegUses) const {

  if (ScaledReg)

    if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))

      return true;

  for (const SCEV *BaseReg : BaseRegs)

    if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))

      return true;

  return false;

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void Formula::print(raw_ostream &OS) const {

  bool First = true;

  if (BaseGV) {

    if (!First) OS << " + "; else First = false;

    BaseGV->printAsOperand(OS, /*PrintType=*/false);

  }

  if (BaseOffset.isNonZero()) {

    if (!First) OS << " + "; else First = false;

    OS << BaseOffset;

  }

  for (const SCEV *BaseReg : BaseRegs) {

    if (!First) OS << " + "; else First = false;

    OS << "reg(" << *BaseReg << ')';

  }

  if (HasBaseReg && BaseRegs.empty()) {

    if (!First) OS << " + "; else First = false;

    OS << "**error: HasBaseReg**";

  } else if (!HasBaseReg && !BaseRegs.empty()) {

    if (!First) OS << " + "; else First = false;

    OS << "**error: !HasBaseReg**";

  }

  if (Scale != 0) {

    if (!First) OS << " + "; else First = false;

    OS << Scale << "*reg(";

    if (ScaledReg)

      OS << *ScaledReg;

    else

      OS << "<unknown>";

    OS << ')';

  }

  if (UnfoldedOffset.isNonZero()) {

    if (!First) OS << " + ";

    OS << "imm(" << UnfoldedOffset << ')';

  }

}


LLVM_DUMP_METHOD void Formula::dump() const {

  print(errs()); errs() << '\n';

}

#endif


/// Return true if the given addrec can be sign-extended without changing its

/// value.

static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {

  Type *WideTy =

    IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1);

  return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));

}


/// Return true if the given add can be sign-extended without changing its

/// value.

static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {

  Type *WideTy =

    IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);

  return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));

}


/// Return true if the given mul can be sign-extended without changing its

/// value.

static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {

  Type *WideTy =

    IntegerType::get(SE.getContext(),

                     SE.getTypeSizeInBits(M->getType()) * M->getNumOperands());

  return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));

}


/// Return an expression for LHS /s RHS, if it can be determined and if the

/// remainder is known to be zero, or null otherwise. If IgnoreSignificantBits

/// is true, expressions like (X * Y) /s Y are simplified to X, ignoring that

/// the multiplication may overflow, which is useful when the result will be

/// used in a context where the most significant bits are ignored.

static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,

                                ScalarEvolution &SE,

                                bool IgnoreSignificantBits = false) {

  // Handle the trivial case, which works for any SCEV type.

  if (LHS == RHS)

    return SE.getConstant(LHS->getType(), 1);


  // Handle a few RHS special cases.

  const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);

  if (RC) {

    const APInt &RA = RC->getAPInt();

    // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do

    // some folding.

    if (RA.isAllOnes()) {

      if (LHS->getType()->isPointerTy())

        return nullptr;

      return SE.getMulExpr(LHS, RC);

    }

    // Handle x /s 1 as x.

    if (RA == 1)

      return LHS;

  }


  // Check for a division of a constant by a constant.

  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {

    if (!RC)

      return nullptr;

    const APInt &LA = C->getAPInt();

    const APInt &RA = RC->getAPInt();

    if (LA.srem(RA) != 0)

      return nullptr;

    return SE.getConstant(LA.sdiv(RA));

  }


  // Distribute the sdiv over addrec operands, if the addrec doesn't overflow.

  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {

    if ((IgnoreSignificantBits || isAddRecSExtable(AR, SE)) && AR->isAffine()) {

      const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,

                                      IgnoreSignificantBits);

      if (!Step) return nullptr;

      const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,

                                       IgnoreSignificantBits);

      if (!Start) return nullptr;

      // FlagNW is independent of the start value, step direction, and is

      // preserved with smaller magnitude steps.

      // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)

      return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap);

    }

    return nullptr;

  }


  // Distribute the sdiv over add operands, if the add doesn't overflow.

  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {

    if (IgnoreSignificantBits || isAddSExtable(Add, SE)) {

      SmallVector<const SCEV *, 8> Ops;

      for (const SCEV *S : Add->operands()) {

        const SCEV *Op = getExactSDiv(S, RHS, SE, IgnoreSignificantBits);

        if (!Op) return nullptr;

        Ops.push_back(Op);

      }

      return SE.getAddExpr(Ops);

    }

    return nullptr;

  }


  // Check for a multiply operand that we can pull RHS out of.

  if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) {

    if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) {

      // Handle special case C1*X*Y /s C2*X*Y.

      if (const SCEVMulExpr *MulRHS = dyn_cast<SCEVMulExpr>(RHS)) {

        if (IgnoreSignificantBits || isMulSExtable(MulRHS, SE)) {

          const SCEVConstant *LC = dyn_cast<SCEVConstant>(Mul->getOperand(0));

          const SCEVConstant *RC =

              dyn_cast<SCEVConstant>(MulRHS->getOperand(0));

          if (LC && RC) {

            SmallVector<const SCEV *, 4> LOps(drop_begin(Mul->operands()));

            SmallVector<const SCEV *, 4> ROps(drop_begin(MulRHS->operands()));

            if (LOps == ROps)

              return getExactSDiv(LC, RC, SE, IgnoreSignificantBits);

          }

        }

      }


      SmallVector<const SCEV *, 4> Ops;

      bool Found = false;

      for (const SCEV *S : Mul->operands()) {

        if (!Found)

          if (const SCEV *Q = getExactSDiv(S, RHS, SE,

                                           IgnoreSignificantBits)) {

            S = Q;

            Found = true;

          }

        Ops.push_back(S);

      }

      return Found ? SE.getMulExpr(Ops) : nullptr;

    }

    return nullptr;

  }


  // Otherwise we don't know.

  return nullptr;

}


/// If S involves the addition of a constant integer value, return that integer

/// value, and mutate S to point to a new SCEV with that value excluded.

static Immediate ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {

  const APInt *C;

  if (match(S, m_scev_APInt(C))) {

    if (C->getSignificantBits() <= 64) {

      S = SE.getConstant(S->getType(), 0);

      return Immediate::getFixed(C->getSExtValue());

    }

  } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {

    SmallVector<const SCEV *, 8> NewOps(Add->operands());

    Immediate Result = ExtractImmediate(NewOps.front(), SE);

    if (Result.isNonZero())

      S = SE.getAddExpr(NewOps);

    return Result;

  } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {

    SmallVector<const SCEV *, 8> NewOps(AR->operands());

    Immediate Result = ExtractImmediate(NewOps.front(), SE);

    if (Result.isNonZero())

      S = SE.getAddRecExpr(NewOps, AR->getLoop(),

                           // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)

                           SCEV::FlagAnyWrap);

    return Result;

  } else if (EnableVScaleImmediates &&

             match(S, m_scev_Mul(m_scev_APInt(C), m_SCEVVScale()))) {

    S = SE.getConstant(S->getType(), 0);

    return Immediate::getScalable(C->getSExtValue());

  }

  return Immediate::getZero();

}


/// If S involves the addition of a GlobalValue address, return that symbol, and

/// mutate S to point to a new SCEV with that value excluded.

static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {

  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {

    if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {

      S = SE.getConstant(GV->getType(), 0);

      return GV;

    }

  } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {

    SmallVector<const SCEV *, 8> NewOps(Add->operands());

    GlobalValue *Result = ExtractSymbol(NewOps.back(), SE);

    if (Result)

      S = SE.getAddExpr(NewOps);

    return Result;

  } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {

    SmallVector<const SCEV *, 8> NewOps(AR->operands());

    GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);

    if (Result)

      S = SE.getAddRecExpr(NewOps, AR->getLoop(),

                           // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)

                           SCEV::FlagAnyWrap);

    return Result;

  }

  return nullptr;

}


/// Returns true if the specified instruction is using the specified value as an

/// address.

static bool isAddressUse(const TargetTransformInfo &TTI,

                         Instruction *Inst, Value *OperandVal) {

  bool isAddress = isa<LoadInst>(Inst);

  if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {

    if (SI->getPointerOperand() == OperandVal)

      isAddress = true;

  } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {

    // Addressing modes can also be folded into prefetches and a variety

    // of intrinsics.

    switch (II->getIntrinsicID()) {

    case Intrinsic::memset:

    case Intrinsic::prefetch:

    case Intrinsic::masked_load:

      if (II->getArgOperand(0) == OperandVal)

        isAddress = true;

      break;

    case Intrinsic::masked_store:

      if (II->getArgOperand(1) == OperandVal)

        isAddress = true;

      break;

    case Intrinsic::memmove:

    case Intrinsic::memcpy:

      if (II->getArgOperand(0) == OperandVal ||

          II->getArgOperand(1) == OperandVal)

        isAddress = true;

      break;

    default: {

      MemIntrinsicInfo IntrInfo;

      if (TTI.getTgtMemIntrinsic(II, IntrInfo)) {

        if (IntrInfo.PtrVal == OperandVal)

          isAddress = true;

      }

    }

    }

  } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {

    if (RMW->getPointerOperand() == OperandVal)

      isAddress = true;

  } else if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) {

    if (CmpX->getPointerOperand() == OperandVal)

      isAddress = true;

  }

  return isAddress;

}


/// Return the type of the memory being accessed.

static MemAccessTy getAccessType(const TargetTransformInfo &TTI,

                                 Instruction *Inst, Value *OperandVal) {

  MemAccessTy AccessTy = MemAccessTy::getUnknown(Inst->getContext());


  // First get the type of memory being accessed.

  if (Type *Ty = Inst->getAccessType())

    AccessTy.MemTy = Ty;


  // Then get the pointer address space.

  if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {

    AccessTy.AddrSpace = SI->getPointerAddressSpace();

  } else if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {

    AccessTy.AddrSpace = LI->getPointerAddressSpace();

  } else if (const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {

    AccessTy.AddrSpace = RMW->getPointerAddressSpace();

  } else if (const AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) {

    AccessTy.AddrSpace = CmpX->getPointerAddressSpace();

  } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {

    switch (II->getIntrinsicID()) {

    case Intrinsic::prefetch:

    case Intrinsic::memset:

      AccessTy.AddrSpace = II->getArgOperand(0)->getType()->getPointerAddressSpace();

      AccessTy.MemTy = OperandVal->getType();

      break;

    case Intrinsic::memmove:

    case Intrinsic::memcpy:

      AccessTy.AddrSpace = OperandVal->getType()->getPointerAddressSpace();

      AccessTy.MemTy = OperandVal->getType();

      break;

    case Intrinsic::masked_load:

      AccessTy.AddrSpace =

          II->getArgOperand(0)->getType()->getPointerAddressSpace();

      break;

    case Intrinsic::masked_store:

      AccessTy.AddrSpace =

          II->getArgOperand(1)->getType()->getPointerAddressSpace();

      break;

    default: {

      MemIntrinsicInfo IntrInfo;

      if (TTI.getTgtMemIntrinsic(II, IntrInfo) && IntrInfo.PtrVal) {

        AccessTy.AddrSpace

          = IntrInfo.PtrVal->getType()->getPointerAddressSpace();

      }


      break;

    }

    }

  }


  return AccessTy;

}


/// Return true if this AddRec is already a phi in its loop.

static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {

  for (PHINode &PN : AR->getLoop()->getHeader()->phis()) {

    if (SE.isSCEVable(PN.getType()) &&

        (SE.getEffectiveSCEVType(PN.getType()) ==

         SE.getEffectiveSCEVType(AR->getType())) &&

        SE.getSCEV(&PN) == AR)

      return true;

  }

  return false;

}


/// Check if expanding this expression is likely to incur significant cost. This

/// is tricky because SCEV doesn't track which expressions are actually computed

/// by the current IR.

///

/// We currently allow expansion of IV increments that involve adds,

/// multiplication by constants, and AddRecs from existing phis.

///

/// TODO: Allow UDivExpr if we can find an existing IV increment that is an

/// obvious multiple of the UDivExpr.

static bool isHighCostExpansion(const SCEV *S,

                                SmallPtrSetImpl<const SCEV*> &Processed,

                                ScalarEvolution &SE) {

  // Zero/One operand expressions

  switch (S->getSCEVType()) {

  case scUnknown:

  case scConstant:

  case scVScale:

    return false;

  case scTruncate:

    return isHighCostExpansion(cast<SCEVTruncateExpr>(S)->getOperand(),

                               Processed, SE);

  case scZeroExtend:

    return isHighCostExpansion(cast<SCEVZeroExtendExpr>(S)->getOperand(),

                               Processed, SE);

  case scSignExtend:

    return isHighCostExpansion(cast<SCEVSignExtendExpr>(S)->getOperand(),

                               Processed, SE);

  default:

    break;

  }


  if (!Processed.insert(S).second)

    return false;


  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {

    for (const SCEV *S : Add->operands()) {

      if (isHighCostExpansion(S, Processed, SE))

        return true;

    }

    return false;

  }


  const SCEV *Op0, *Op1;

  if (match(S, m_scev_Mul(m_SCEV(Op0), m_SCEV(Op1)))) {

    // Multiplication by a constant is ok

    if (isa<SCEVConstant>(Op0))

      return isHighCostExpansion(Op1, Processed, SE);


    // If we have the value of one operand, check if an existing

    // multiplication already generates this expression.

    if (const auto *U = dyn_cast<SCEVUnknown>(Op1)) {

      Value *UVal = U->getValue();

      for (User *UR : UVal->users()) {

        // If U is a constant, it may be used by a ConstantExpr.

        Instruction *UI = dyn_cast<Instruction>(UR);

        if (UI && UI->getOpcode() == Instruction::Mul &&

            SE.isSCEVable(UI->getType())) {

          return SE.getSCEV(UI) == S;

        }

      }

    }

  }


  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {

    if (isExistingPhi(AR, SE))

      return false;

  }


  // Fow now, consider any other type of expression (div/mul/min/max) high cost.

  return true;

}


namespace {


class LSRUse;


} // end anonymous namespace


/// Check if the addressing mode defined by \p F is completely

/// folded in \p LU at isel time.

/// This includes address-mode folding and special icmp tricks.

/// This function returns true if \p LU can accommodate what \p F

/// defines and up to 1 base + 1 scaled + offset.

/// In other words, if \p F has several base registers, this function may

/// still return true. Therefore, users still need to account for

/// additional base registers and/or unfolded offsets to derive an

/// accurate cost model.

static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,

                                 const LSRUse &LU, const Formula &F);


// Get the cost of the scaling factor used in F for LU.

static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI,

                                            const LSRUse &LU, const Formula &F,

                                            const Loop &L);


namespace {


/// This class is used to measure and compare candidate formulae.

class Cost {

  const Loop *L = nullptr;

  ScalarEvolution *SE = nullptr;

  const TargetTransformInfo *TTI = nullptr;

  TargetTransformInfo::LSRCost C;

  TTI::AddressingModeKind AMK = TTI::AMK_None;


public:

  Cost() = delete;

  Cost(const Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,

       TTI::AddressingModeKind AMK) :

    L(L), SE(&SE), TTI(&TTI), AMK(AMK) {

    C.Insns = 0;

    C.NumRegs = 0;

    C.AddRecCost = 0;

    C.NumIVMuls = 0;

    C.NumBaseAdds = 0;

    C.ImmCost = 0;

    C.SetupCost = 0;

    C.ScaleCost = 0;

  }


  bool isLess(const Cost &Other) const;


  void Lose();


#ifndef NDEBUG

  // Once any of the metrics loses, they must all remain losers.

  bool isValid() {

    return ((C.Insns | C.NumRegs | C.AddRecCost | C.NumIVMuls | C.NumBaseAdds

             | C.ImmCost | C.SetupCost | C.ScaleCost) != ~0u)

      || ((C.Insns & C.NumRegs & C.AddRecCost & C.NumIVMuls & C.NumBaseAdds

           & C.ImmCost & C.SetupCost & C.ScaleCost) == ~0u);

  }

#endif


  bool isLoser() {

    assert(isValid() && "invalid cost");

    return C.NumRegs == ~0u;

  }


  void RateFormula(const Formula &F, SmallPtrSetImpl<const SCEV *> &Regs,

                   const DenseSet<const SCEV *> &VisitedRegs, const LSRUse &LU,

                   bool HardwareLoopProfitable,

                   SmallPtrSetImpl<const SCEV *> *LoserRegs = nullptr);


  void print(raw_ostream &OS) const;

  void dump() const;


private:

  void RateRegister(const Formula &F, const SCEV *Reg,

                    SmallPtrSetImpl<const SCEV *> &Regs, const LSRUse &LU,

                    bool HardwareLoopProfitable);

  void RatePrimaryRegister(const Formula &F, const SCEV *Reg,

                           SmallPtrSetImpl<const SCEV *> &Regs,

                           const LSRUse &LU, bool HardwareLoopProfitable,

                           SmallPtrSetImpl<const SCEV *> *LoserRegs);

};


/// An operand value in an instruction which is to be replaced with some

/// equivalent, possibly strength-reduced, replacement.

struct LSRFixup {

  /// The instruction which will be updated.

  Instruction *UserInst = nullptr;


  /// The operand of the instruction which will be replaced. The operand may be

  /// used more than once; every instance will be replaced.

  Value *OperandValToReplace = nullptr;


  /// If this user is to use the post-incremented value of an induction

  /// variable, this set is non-empty and holds the loops associated with the

  /// induction variable.

  PostIncLoopSet PostIncLoops;


  /// A constant offset to be added to the LSRUse expression.  This allows

  /// multiple fixups to share the same LSRUse with different offsets, for

  /// example in an unrolled loop.

  Immediate Offset = Immediate::getZero();


  LSRFixup() = default;


  bool isUseFullyOutsideLoop(const Loop *L) const;


  void print(raw_ostream &OS) const;

  void dump() const;

};


/// This class holds the state that LSR keeps for each use in IVUsers, as well

/// as uses invented by LSR itself. It includes information about what kinds of

/// things can be folded into the user, information about the user itself, and

/// information about how the use may be satisfied.  TODO: Represent multiple

/// users of the same expression in common?

class LSRUse {

  DenseSet<SmallVector<const SCEV *, 4>> Uniquifier;


public:

  /// An enum for a kind of use, indicating what types of scaled and immediate

  /// operands it might support.

  enum KindType {

    Basic,   ///< A normal use, with no folding.

    Special, ///< A special case of basic, allowing -1 scales.

    Address, ///< An address use; folding according to TargetLowering

    ICmpZero ///< An equality icmp with both operands folded into one.

    // TODO: Add a generic icmp too?

  };


  using SCEVUseKindPair = PointerIntPair<const SCEV *, 2, KindType>;


  KindType Kind;

  MemAccessTy AccessTy;


  /// The list of operands which are to be replaced.

  SmallVector<LSRFixup, 8> Fixups;


  /// Keep track of the min and max offsets of the fixups.

  Immediate MinOffset = Immediate::getFixedMax();

  Immediate MaxOffset = Immediate::getFixedMin();


  /// This records whether all of the fixups using this LSRUse are outside of

  /// the loop, in which case some special-case heuristics may be used.

  bool AllFixupsOutsideLoop = true;


  /// RigidFormula is set to true to guarantee that this use will be associated

  /// with a single formula--the one that initially matched. Some SCEV

  /// expressions cannot be expanded. This allows LSR to consider the registers

  /// used by those expressions without the need to expand them later after

  /// changing the formula.

  bool RigidFormula = false;


  /// This records the widest use type for any fixup using this

  /// LSRUse. FindUseWithSimilarFormula can't consider uses with different max

  /// fixup widths to be equivalent, because the narrower one may be relying on

  /// the implicit truncation to truncate away bogus bits.

  Type *WidestFixupType = nullptr;


  /// A list of ways to build a value that can satisfy this user.  After the

  /// list is populated, one of these is selected heuristically and used to

  /// formulate a replacement for OperandValToReplace in UserInst.

  SmallVector<Formula, 12> Formulae;


  /// The set of register candidates used by all formulae in this LSRUse.

  SmallPtrSet<const SCEV *, 4> Regs;


  LSRUse(KindType K, MemAccessTy AT) : Kind(K), AccessTy(AT) {}


  LSRFixup &getNewFixup() {

    Fixups.push_back(LSRFixup());

    return Fixups.back();

  }


  void pushFixup(LSRFixup &f) {

    Fixups.push_back(f);

    if (Immediate::isKnownGT(f.Offset, MaxOffset))

      MaxOffset = f.Offset;

    if (Immediate::isKnownLT(f.Offset, MinOffset))

      MinOffset = f.Offset;

  }


  bool HasFormulaWithSameRegs(const Formula &F) const;

  float getNotSelectedProbability(const SCEV *Reg) const;

  bool InsertFormula(const Formula &F, const Loop &L);

  void DeleteFormula(Formula &F);

  void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);


  void print(raw_ostream &OS) const;

  void dump() const;

};


} // end anonymous namespace


static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,

                                 LSRUse::KindType Kind, MemAccessTy AccessTy,

                                 GlobalValue *BaseGV, Immediate BaseOffset,

                                 bool HasBaseReg, int64_t Scale,

                                 Instruction *Fixup = nullptr);


static unsigned getSetupCost(const SCEV *Reg, unsigned Depth) {

  if (isa<SCEVUnknown>(Reg) || isa<SCEVConstant>(Reg))

    return 1;

  if (Depth == 0)

    return 0;

  if (const auto *S = dyn_cast<SCEVAddRecExpr>(Reg))

    return getSetupCost(S->getStart(), Depth - 1);

  if (auto S = dyn_cast<SCEVIntegralCastExpr>(Reg))

    return getSetupCost(S->getOperand(), Depth - 1);

  if (auto S = dyn_cast<SCEVNAryExpr>(Reg))

    return std::accumulate(S->operands().begin(), S->operands().end(), 0,

                           [&](unsigned i, const SCEV *Reg) {

                             return i + getSetupCost(Reg, Depth - 1);

                           });

  if (auto S = dyn_cast<SCEVUDivExpr>(Reg))

    return getSetupCost(S->getLHS(), Depth - 1) +

           getSetupCost(S->getRHS(), Depth - 1);

  return 0;

}


/// Tally up interesting quantities from the given register.

void Cost::RateRegister(const Formula &F, const SCEV *Reg,

                        SmallPtrSetImpl<const SCEV *> &Regs, const LSRUse &LU,

                        bool HardwareLoopProfitable) {

  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {

    // If this is an addrec for another loop, it should be an invariant

    // with respect to L since L is the innermost loop (at least

    // for now LSR only handles innermost loops).

    if (AR->getLoop() != L) {

      // If the AddRec exists, consider it's register free and leave it alone.

      if (isExistingPhi(AR, *SE) && AMK != TTI::AMK_PostIndexed)

        return;


      // It is bad to allow LSR for current loop to add induction variables

      // for its sibling loops.

      if (!AR->getLoop()->contains(L)) {

        Lose();

        return;

      }


      // Otherwise, it will be an invariant with respect to Loop L.

      ++C.NumRegs;

      return;

    }


    unsigned LoopCost = 1;

    if (TTI->isIndexedLoadLegal(TTI->MIM_PostInc, AR->getType()) ||

        TTI->isIndexedStoreLegal(TTI->MIM_PostInc, AR->getType())) {

      const SCEV *Start;

      const SCEVConstant *Step;

      if (match(AR, m_scev_AffineAddRec(m_SCEV(Start), m_SCEVConstant(Step))))

        // If the step size matches the base offset, we could use pre-indexed

        // addressing.

        if ((AMK == TTI::AMK_PreIndexed && F.BaseOffset.isFixed() &&

             Step->getAPInt() == F.BaseOffset.getFixedValue()) ||

            (AMK == TTI::AMK_PostIndexed && !isa<SCEVConstant>(Start) &&

             SE->isLoopInvariant(Start, L)))

          LoopCost = 0;

    }

    // If the loop counts down to zero and we'll be using a hardware loop then

    // the addrec will be combined into the hardware loop instruction.

    if (LU.Kind == LSRUse::ICmpZero && F.countsDownToZero() &&

        HardwareLoopProfitable)

      LoopCost = 0;

    C.AddRecCost += LoopCost;


    // Add the step value register, if it needs one.

    // TODO: The non-affine case isn't precisely modeled here.

    if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {

      if (!Regs.count(AR->getOperand(1))) {

        RateRegister(F, AR->getOperand(1), Regs, LU, HardwareLoopProfitable);

        if (isLoser())

          return;

      }

    }

  }

  ++C.NumRegs;


  // Rough heuristic; favor registers which don't require extra setup

  // instructions in the preheader.

  C.SetupCost += getSetupCost(Reg, SetupCostDepthLimit);

  // Ensure we don't, even with the recusion limit, produce invalid costs.

  C.SetupCost = std::min<unsigned>(C.SetupCost, 1 << 16);


  C.NumIVMuls += isa<SCEVMulExpr>(Reg) &&

               SE->hasComputableLoopEvolution(Reg, L);

}


/// Record this register in the set. If we haven't seen it before, rate

/// it. Optional LoserRegs provides a way to declare any formula that refers to

/// one of those regs an instant loser.

void Cost::RatePrimaryRegister(const Formula &F, const SCEV *Reg,

                               SmallPtrSetImpl<const SCEV *> &Regs,

                               const LSRUse &LU, bool HardwareLoopProfitable,

                               SmallPtrSetImpl<const SCEV *> *LoserRegs) {

  if (LoserRegs && LoserRegs->count(Reg)) {

    Lose();

    return;

  }

  if (Regs.insert(Reg).second) {

    RateRegister(F, Reg, Regs, LU, HardwareLoopProfitable);

    if (LoserRegs && isLoser())

      LoserRegs->insert(Reg);

  }

}


void Cost::RateFormula(const Formula &F, SmallPtrSetImpl<const SCEV *> &Regs,

                       const DenseSet<const SCEV *> &VisitedRegs,

                       const LSRUse &LU, bool HardwareLoopProfitable,

                       SmallPtrSetImpl<const SCEV *> *LoserRegs) {

  if (isLoser())

    return;

  assert(F.isCanonical(*L) && "Cost is accurate only for canonical formula");

  // Tally up the registers.

  unsigned PrevAddRecCost = C.AddRecCost;

  unsigned PrevNumRegs = C.NumRegs;

  unsigned PrevNumBaseAdds = C.NumBaseAdds;

  if (const SCEV *ScaledReg = F.ScaledReg) {

    if (VisitedRegs.count(ScaledReg)) {

      Lose();

      return;

    }

    RatePrimaryRegister(F, ScaledReg, Regs, LU, HardwareLoopProfitable,

                        LoserRegs);

    if (isLoser())

      return;

  }

  for (const SCEV *BaseReg : F.BaseRegs) {

    if (VisitedRegs.count(BaseReg)) {

      Lose();

      return;

    }

    RatePrimaryRegister(F, BaseReg, Regs, LU, HardwareLoopProfitable,

                        LoserRegs);

    if (isLoser())

      return;

  }


  // Determine how many (unfolded) adds we'll need inside the loop.

  size_t NumBaseParts = F.getNumRegs();

  if (NumBaseParts > 1)

    // Do not count the base and a possible second register if the target

    // allows to fold 2 registers.

    C.NumBaseAdds +=

        NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(*TTI, LU, F)));

  C.NumBaseAdds += (F.UnfoldedOffset.isNonZero());


  // Accumulate non-free scaling amounts.

  C.ScaleCost += getScalingFactorCost(*TTI, LU, F, *L).getValue();


  // Tally up the non-zero immediates.

  for (const LSRFixup &Fixup : LU.Fixups) {

    if (Fixup.Offset.isCompatibleImmediate(F.BaseOffset)) {

      Immediate Offset = Fixup.Offset.addUnsigned(F.BaseOffset);

      if (F.BaseGV)

        C.ImmCost += 64; // Handle symbolic values conservatively.

                         // TODO: This should probably be the pointer size.

      else if (Offset.isNonZero())

        C.ImmCost +=

            APInt(64, Offset.getKnownMinValue(), true).getSignificantBits();


      // Check with target if this offset with this instruction is

      // specifically not supported.

      if (LU.Kind == LSRUse::Address && Offset.isNonZero() &&

          !isAMCompletelyFolded(*TTI, LSRUse::Address, LU.AccessTy, F.BaseGV,

                                Offset, F.HasBaseReg, F.Scale, Fixup.UserInst))

        C.NumBaseAdds++;

    } else {

      // Incompatible immediate type, increase cost to avoid using

      C.ImmCost += 2048;

    }

  }


  // If we don't count instruction cost exit here.

  if (!InsnsCost) {

    assert(isValid() && "invalid cost");

    return;

  }


  // Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as

  // additional instruction (at least fill).

  // TODO: Need distinguish register class?

  unsigned TTIRegNum = TTI->getNumberOfRegisters(

                       TTI->getRegisterClassForType(false, F.getType())) - 1;

  if (C.NumRegs > TTIRegNum) {

    // Cost already exceeded TTIRegNum, then only newly added register can add

    // new instructions.

    if (PrevNumRegs > TTIRegNum)

      C.Insns += (C.NumRegs - PrevNumRegs);

    else

      C.Insns += (C.NumRegs - TTIRegNum);

  }


  // If ICmpZero formula ends with not 0, it could not be replaced by

  // just add or sub. We'll need to compare final result of AddRec.

  // That means we'll need an additional instruction. But if the target can

  // macro-fuse a compare with a branch, don't count this extra instruction.

  // For -10 + {0, +, 1}:

  // i = i + 1;

  // cmp i, 10

  //

  // For {-10, +, 1}:

  // i = i + 1;

  if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd() &&

      !TTI->canMacroFuseCmp())

    C.Insns++;

  // Each new AddRec adds 1 instruction to calculation.

  C.Insns += (C.AddRecCost - PrevAddRecCost);


  // BaseAdds adds instructions for unfolded registers.

  if (LU.Kind != LSRUse::ICmpZero)

    C.Insns += C.NumBaseAdds - PrevNumBaseAdds;

  assert(isValid() && "invalid cost");

}


/// Set this cost to a losing value.

void Cost::Lose() {

  C.Insns = std::numeric_limits<unsigned>::max();

  C.NumRegs = std::numeric_limits<unsigned>::max();

  C.AddRecCost = std::numeric_limits<unsigned>::max();

  C.NumIVMuls = std::numeric_limits<unsigned>::max();

  C.NumBaseAdds = std::numeric_limits<unsigned>::max();

  C.ImmCost = std::numeric_limits<unsigned>::max();

  C.SetupCost = std::numeric_limits<unsigned>::max();

  C.ScaleCost = std::numeric_limits<unsigned>::max();

}


/// Choose the lower cost.

bool Cost::isLess(const Cost &Other) const {

  if (InsnsCost.getNumOccurrences() > 0 && InsnsCost &&

      C.Insns != Other.C.Insns)

    return C.Insns < Other.C.Insns;

  return TTI->isLSRCostLess(C, Other.C);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void Cost::print(raw_ostream &OS) const {

  if (InsnsCost)

    OS << C.Insns << " instruction" << (C.Insns == 1 ? " " : "s ");

  OS << C.NumRegs << " reg" << (C.NumRegs == 1 ? "" : "s");

  if (C.AddRecCost != 0)

    OS << ", with addrec cost " << C.AddRecCost;

  if (C.NumIVMuls != 0)

    OS << ", plus " << C.NumIVMuls << " IV mul"

       << (C.NumIVMuls == 1 ? "" : "s");

  if (C.NumBaseAdds != 0)

    OS << ", plus " << C.NumBaseAdds << " base add"

       << (C.NumBaseAdds == 1 ? "" : "s");

  if (C.ScaleCost != 0)

    OS << ", plus " << C.ScaleCost << " scale cost";

  if (C.ImmCost != 0)

    OS << ", plus " << C.ImmCost << " imm cost";

  if (C.SetupCost != 0)

    OS << ", plus " << C.SetupCost << " setup cost";

}


LLVM_DUMP_METHOD void Cost::dump() const {

  print(errs()); errs() << '\n';

}

#endif


/// Test whether this fixup always uses its value outside of the given loop.

bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {

  // PHI nodes use their value in their incoming blocks.

  if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) {

    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)

      if (PN->getIncomingValue(i) == OperandValToReplace &&

          L->contains(PN->getIncomingBlock(i)))

        return false;

    return true;

  }


  return !L->contains(UserInst);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void LSRFixup::print(raw_ostream &OS) const {

  OS << "UserInst=";

  // Store is common and interesting enough to be worth special-casing.

  if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {

    OS << "store ";

    Store->getOperand(0)->printAsOperand(OS, /*PrintType=*/false);

  } else if (UserInst->getType()->isVoidTy())

    OS << UserInst->getOpcodeName();

  else

    UserInst->printAsOperand(OS, /*PrintType=*/false);


  OS << ", OperandValToReplace=";

  OperandValToReplace->printAsOperand(OS, /*PrintType=*/false);


  for (const Loop *PIL : PostIncLoops) {

    OS << ", PostIncLoop=";

    PIL->getHeader()->printAsOperand(OS, /*PrintType=*/false);

  }


  if (Offset.isNonZero())

    OS << ", Offset=" << Offset;

}


LLVM_DUMP_METHOD void LSRFixup::dump() const {

  print(errs()); errs() << '\n';

}

#endif


/// Test whether this use as a formula which has the same registers as the given

/// formula.

bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {

  SmallVector<const SCEV *, 4> Key = F.BaseRegs;

  if (F.ScaledReg) Key.push_back(F.ScaledReg);

  // Unstable sort by host order ok, because this is only used for uniquifying.

  llvm::sort(Key);

  return Uniquifier.count(Key);

}


/// The function returns a probability of selecting formula without Reg.

float LSRUse::getNotSelectedProbability(const SCEV *Reg) const {

  unsigned FNum = 0;

  for (const Formula &F : Formulae)

    if (F.referencesReg(Reg))

      FNum++;

  return ((float)(Formulae.size() - FNum)) / Formulae.size();

}


/// If the given formula has not yet been inserted, add it to the list, and

/// return true. Return false otherwise.  The formula must be in canonical form.

bool LSRUse::InsertFormula(const Formula &F, const Loop &L) {

  assert(F.isCanonical(L) && "Invalid canonical representation");


  if (!Formulae.empty() && RigidFormula)

    return false;


  SmallVector<const SCEV *, 4> Key = F.BaseRegs;

  if (F.ScaledReg) Key.push_back(F.ScaledReg);

  // Unstable sort by host order ok, because this is only used for uniquifying.

  llvm::sort(Key);


  if (!Uniquifier.insert(Key).second)

    return false;


  // Using a register to hold the value of 0 is not profitable.

  assert((!F.ScaledReg || !F.ScaledReg->isZero()) &&

         "Zero allocated in a scaled register!");

#ifndef NDEBUG

  for (const SCEV *BaseReg : F.BaseRegs)

    assert(!BaseReg->isZero() && "Zero allocated in a base register!");

#endif


  // Add the formula to the list.

  Formulae.push_back(F);


  // Record registers now being used by this use.

  Regs.insert_range(F.BaseRegs);

  if (F.ScaledReg)

    Regs.insert(F.ScaledReg);


  return true;

}


/// Remove the given formula from this use's list.

void LSRUse::DeleteFormula(Formula &F) {

  if (&F != &Formulae.back())

    std::swap(F, Formulae.back());

  Formulae.pop_back();

}


/// Recompute the Regs field, and update RegUses.

void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {

  // Now that we've filtered out some formulae, recompute the Regs set.

  SmallPtrSet<const SCEV *, 4> OldRegs = std::move(Regs);

  Regs.clear();

  for (const Formula &F : Formulae) {

    if (F.ScaledReg) Regs.insert(F.ScaledReg);

    Regs.insert_range(F.BaseRegs);

  }


  // Update the RegTracker.

  for (const SCEV *S : OldRegs)

    if (!Regs.count(S))

      RegUses.dropRegister(S, LUIdx);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void LSRUse::print(raw_ostream &OS) const {

  OS << "LSR Use: Kind=";

  switch (Kind) {

  case Basic:    OS << "Basic"; break;

  case Special:  OS << "Special"; break;

  case ICmpZero: OS << "ICmpZero"; break;

  case Address:

    OS << "Address of ";

    if (AccessTy.MemTy->isPointerTy())

      OS << "pointer"; // the full pointer type could be really verbose

    else {

      OS << *AccessTy.MemTy;

    }


    OS << " in addrspace(" << AccessTy.AddrSpace << ')';

  }


  OS << ", Offsets={";

  bool NeedComma = false;

  for (const LSRFixup &Fixup : Fixups) {

    if (NeedComma) OS << ',';

    OS << Fixup.Offset;

    NeedComma = true;

  }

  OS << '}';


  if (AllFixupsOutsideLoop)

    OS << ", all-fixups-outside-loop";


  if (WidestFixupType)

    OS << ", widest fixup type: " << *WidestFixupType;

}


LLVM_DUMP_METHOD void LSRUse::dump() const {

  print(errs()); errs() << '\n';

}

#endif


static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,

                                 LSRUse::KindType Kind, MemAccessTy AccessTy,

                                 GlobalValue *BaseGV, Immediate BaseOffset,

                                 bool HasBaseReg, int64_t Scale,

                                 Instruction *Fixup /* = nullptr */) {

  switch (Kind) {

  case LSRUse::Address: {

    int64_t FixedOffset =

        BaseOffset.isScalable() ? 0 : BaseOffset.getFixedValue();

    int64_t ScalableOffset =

        BaseOffset.isScalable() ? BaseOffset.getKnownMinValue() : 0;

    return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, FixedOffset,

                                     HasBaseReg, Scale, AccessTy.AddrSpace,

                                     Fixup, ScalableOffset);

  }

  case LSRUse::ICmpZero:

    // There's not even a target hook for querying whether it would be legal to

    // fold a GV into an ICmp.

    if (BaseGV)

      return false;


    // ICmp only has two operands; don't allow more than two non-trivial parts.

    if (Scale != 0 && HasBaseReg && BaseOffset.isNonZero())

      return false;


    // ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by

    // putting the scaled register in the other operand of the icmp.

    if (Scale != 0 && Scale != -1)

      return false;


    // If we have low-level target information, ask the target if it can fold an

    // integer immediate on an icmp.

    if (BaseOffset.isNonZero()) {

      // We don't have an interface to query whether the target supports

      // icmpzero against scalable quantities yet.

      if (BaseOffset.isScalable())

        return false;


      // We have one of:

      // ICmpZero     BaseReg + BaseOffset => ICmp BaseReg, -BaseOffset

      // ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset

      // Offs is the ICmp immediate.

      if (Scale == 0)

        // The cast does the right thing with

        // std::numeric_limits<int64_t>::min().

        BaseOffset = BaseOffset.getFixed(-(uint64_t)BaseOffset.getFixedValue());

      return TTI.isLegalICmpImmediate(BaseOffset.getFixedValue());

    }


    // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg

    return true;


  case LSRUse::Basic:

    // Only handle single-register values.

    return !BaseGV && Scale == 0 && BaseOffset.isZero();


  case LSRUse::Special:

    // Special case Basic to handle -1 scales.

    return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset.isZero();

  }


  llvm_unreachable("Invalid LSRUse Kind!");

}


static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,

                                 Immediate MinOffset, Immediate MaxOffset,

                                 LSRUse::KindType Kind, MemAccessTy AccessTy,

                                 GlobalValue *BaseGV, Immediate BaseOffset,

                                 bool HasBaseReg, int64_t Scale) {

  if (BaseOffset.isNonZero() &&

      (BaseOffset.isScalable() != MinOffset.isScalable() ||

       BaseOffset.isScalable() != MaxOffset.isScalable()))

    return false;

  // Check for overflow.

  int64_t Base = BaseOffset.getKnownMinValue();

  int64_t Min = MinOffset.getKnownMinValue();

  int64_t Max = MaxOffset.getKnownMinValue();

  if (((int64_t)((uint64_t)Base + Min) > Base) != (Min > 0))

    return false;

  MinOffset = Immediate::get((uint64_t)Base + Min, MinOffset.isScalable());

  if (((int64_t)((uint64_t)Base + Max) > Base) != (Max > 0))

    return false;

  MaxOffset = Immediate::get((uint64_t)Base + Max, MaxOffset.isScalable());


  return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MinOffset,

                              HasBaseReg, Scale) &&

         isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MaxOffset,

                              HasBaseReg, Scale);

}


static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,

                                 Immediate MinOffset, Immediate MaxOffset,

                                 LSRUse::KindType Kind, MemAccessTy AccessTy,

                                 const Formula &F, const Loop &L) {

  // For the purpose of isAMCompletelyFolded either having a canonical formula

  // or a scale not equal to zero is correct.

  // Problems may arise from non canonical formulae having a scale == 0.

  // Strictly speaking it would best to just rely on canonical formulae.

  // However, when we generate the scaled formulae, we first check that the

  // scaling factor is profitable before computing the actual ScaledReg for

  // compile time sake.

  assert((F.isCanonical(L) || F.Scale != 0));

  return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,

                              F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale);

}


/// Test whether we know how to expand the current formula.

static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset,

                       Immediate MaxOffset, LSRUse::KindType Kind,

                       MemAccessTy AccessTy, GlobalValue *BaseGV,

                       Immediate BaseOffset, bool HasBaseReg, int64_t Scale) {

  // We know how to expand completely foldable formulae.

  return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,

                              BaseOffset, HasBaseReg, Scale) ||

         // Or formulae that use a base register produced by a sum of base

         // registers.

         (Scale == 1 &&

          isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,

                               BaseGV, BaseOffset, true, 0));

}


static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset,

                       Immediate MaxOffset, LSRUse::KindType Kind,

                       MemAccessTy AccessTy, const Formula &F) {

  return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV,

                    F.BaseOffset, F.HasBaseReg, F.Scale);

}


static bool isLegalAddImmediate(const TargetTransformInfo &TTI,

                                Immediate Offset) {

  if (Offset.isScalable())

    return TTI.isLegalAddScalableImmediate(Offset.getKnownMinValue());


  return TTI.isLegalAddImmediate(Offset.getFixedValue());

}


static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,

                                 const LSRUse &LU, const Formula &F) {

  // Target may want to look at the user instructions.

  if (LU.Kind == LSRUse::Address && TTI.LSRWithInstrQueries()) {

    for (const LSRFixup &Fixup : LU.Fixups)

      if (!isAMCompletelyFolded(TTI, LSRUse::Address, LU.AccessTy, F.BaseGV,

                                (F.BaseOffset + Fixup.Offset), F.HasBaseReg,

                                F.Scale, Fixup.UserInst))

        return false;

    return true;

  }


  return isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,

                              LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg,

                              F.Scale);

}


static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI,

                                            const LSRUse &LU, const Formula &F,

                                            const Loop &L) {

  if (!F.Scale)

    return 0;


  // If the use is not completely folded in that instruction, we will have to

  // pay an extra cost only for scale != 1.

  if (!isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,

                            LU.AccessTy, F, L))

    return F.Scale != 1;


  switch (LU.Kind) {

  case LSRUse::Address: {

    // Check the scaling factor cost with both the min and max offsets.

    int64_t ScalableMin = 0, ScalableMax = 0, FixedMin = 0, FixedMax = 0;

    if (F.BaseOffset.isScalable()) {

      ScalableMin = (F.BaseOffset + LU.MinOffset).getKnownMinValue();

      ScalableMax = (F.BaseOffset + LU.MaxOffset).getKnownMinValue();

    } else {

      FixedMin = (F.BaseOffset + LU.MinOffset).getFixedValue();

      FixedMax = (F.BaseOffset + LU.MaxOffset).getFixedValue();

    }

    InstructionCost ScaleCostMinOffset = TTI.getScalingFactorCost(

        LU.AccessTy.MemTy, F.BaseGV, StackOffset::get(FixedMin, ScalableMin),

        F.HasBaseReg, F.Scale, LU.AccessTy.AddrSpace);

    InstructionCost ScaleCostMaxOffset = TTI.getScalingFactorCost(

        LU.AccessTy.MemTy, F.BaseGV, StackOffset::get(FixedMax, ScalableMax),

        F.HasBaseReg, F.Scale, LU.AccessTy.AddrSpace);


    assert(ScaleCostMinOffset.isValid() && ScaleCostMaxOffset.isValid() &&

           "Legal addressing mode has an illegal cost!");

    return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);

  }

  case LSRUse::ICmpZero:

  case LSRUse::Basic:

  case LSRUse::Special:

    // The use is completely folded, i.e., everything is folded into the

    // instruction.

    return 0;

  }


  llvm_unreachable("Invalid LSRUse Kind!");

}


static bool isAlwaysFoldable(const TargetTransformInfo &TTI,

                             LSRUse::KindType Kind, MemAccessTy AccessTy,

                             GlobalValue *BaseGV, Immediate BaseOffset,

                             bool HasBaseReg) {

  // Fast-path: zero is always foldable.

  if (BaseOffset.isZero() && !BaseGV)

    return true;


  // Conservatively, create an address with an immediate and a

  // base and a scale.

  int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;


  // Canonicalize a scale of 1 to a base register if the formula doesn't

  // already have a base register.

  if (!HasBaseReg && Scale == 1) {

    Scale = 0;

    HasBaseReg = true;

  }


  // FIXME: Try with + without a scale? Maybe based on TTI?

  // I think basereg + scaledreg + immediateoffset isn't a good 'conservative'

  // default for many architectures, not just AArch64 SVE. More investigation

  // needed later to determine if this should be used more widely than just

  // on scalable types.

  if (HasBaseReg && BaseOffset.isNonZero() && Kind != LSRUse::ICmpZero &&

      AccessTy.MemTy && AccessTy.MemTy->isScalableTy() && DropScaledForVScale)

    Scale = 0;


  return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset,

                              HasBaseReg, Scale);

}


static bool isAlwaysFoldable(const TargetTransformInfo &TTI,

                             ScalarEvolution &SE, Immediate MinOffset,

                             Immediate MaxOffset, LSRUse::KindType Kind,

                             MemAccessTy AccessTy, const SCEV *S,

                             bool HasBaseReg) {

  // Fast-path: zero is always foldable.

  if (S->isZero()) return true;


  // Conservatively, create an address with an immediate and a

  // base and a scale.

  Immediate BaseOffset = ExtractImmediate(S, SE);

  GlobalValue *BaseGV = ExtractSymbol(S, SE);


  // If there's anything else involved, it's not foldable.

  if (!S->isZero()) return false;


  // Fast-path: zero is always foldable.

  if (BaseOffset.isZero() && !BaseGV)

    return true;


  if (BaseOffset.isScalable())

    return false;


  // Conservatively, create an address with an immediate and a

  // base and a scale.

  int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;


  return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,

                              BaseOffset, HasBaseReg, Scale);

}


namespace {


/// An individual increment in a Chain of IV increments.  Relate an IV user to

/// an expression that computes the IV it uses from the IV used by the previous

/// link in the Chain.

///

/// For the head of a chain, IncExpr holds the absolute SCEV expression for the

/// original IVOperand. The head of the chain's IVOperand is only valid during

/// chain collection, before LSR replaces IV users. During chain generation,

/// IncExpr can be used to find the new IVOperand that computes the same

/// expression.

struct IVInc {

  Instruction *UserInst;

  Value* IVOperand;

  const SCEV *IncExpr;


  IVInc(Instruction *U, Value *O, const SCEV *E)

      : UserInst(U), IVOperand(O), IncExpr(E) {}

};


// The list of IV increments in program order.  We typically add the head of a

// chain without finding subsequent links.

struct IVChain {

  SmallVector<IVInc, 1> Incs;

  const SCEV *ExprBase = nullptr;


  IVChain() = default;

  IVChain(const IVInc &Head, const SCEV *Base)

      : Incs(1, Head), ExprBase(Base) {}


  using const_iterator = SmallVectorImpl<IVInc>::const_iterator;


  // Return the first increment in the chain.

  const_iterator begin() const {

    assert(!Incs.empty());

    return std::next(Incs.begin());

  }

  const_iterator end() const {

    return Incs.end();

  }


  // Returns true if this chain contains any increments.

  bool hasIncs() const { return Incs.size() >= 2; }


  // Add an IVInc to the end of this chain.

  void add(const IVInc &X) { Incs.push_back(X); }


  // Returns the last UserInst in the chain.

  Instruction *tailUserInst() const { return Incs.back().UserInst; }


  // Returns true if IncExpr can be profitably added to this chain.

  bool isProfitableIncrement(const SCEV *OperExpr,

                             const SCEV *IncExpr,

                             ScalarEvolution&);

};


/// Helper for CollectChains to track multiple IV increment uses.  Distinguish

/// between FarUsers that definitely cross IV increments and NearUsers that may

/// be used between IV increments.

struct ChainUsers {

  SmallPtrSet<Instruction*, 4> FarUsers;

  SmallPtrSet<Instruction*, 4> NearUsers;

};


/// This class holds state for the main loop strength reduction logic.

class LSRInstance {

  IVUsers &IU;

  ScalarEvolution &SE;

  DominatorTree &DT;

  LoopInfo &LI;

  AssumptionCache &AC;

  TargetLibraryInfo &TLI;

  const TargetTransformInfo &TTI;

  Loop *const L;

  MemorySSAUpdater *MSSAU;

  TTI::AddressingModeKind AMK;

  mutable SCEVExpander Rewriter;

  bool Changed = false;

  bool HardwareLoopProfitable = false;


  /// This is the insert position that the current loop's induction variable

  /// increment should be placed. In simple loops, this is the latch block's

  /// terminator. But in more complicated cases, this is a position which will

  /// dominate all the in-loop post-increment users.

  Instruction *IVIncInsertPos = nullptr;


  /// Interesting factors between use strides.

  ///

  /// We explicitly use a SetVector which contains a SmallSet, instead of the

  /// default, a SmallDenseSet, because we need to use the full range of

  /// int64_ts, and there's currently no good way of doing that with

  /// SmallDenseSet.

  SetVector<int64_t, SmallVector<int64_t, 8>, SmallSet<int64_t, 8>> Factors;


  /// The cost of the current SCEV, the best solution by LSR will be dropped if

  /// the solution is not profitable.

  Cost BaselineCost;


  /// Interesting use types, to facilitate truncation reuse.

  SmallSetVector<Type *, 4> Types;


  /// The list of interesting uses.

  mutable SmallVector<LSRUse, 16> Uses;


  /// Track which uses use which register candidates.

  RegUseTracker RegUses;


  // Limit the number of chains to avoid quadratic behavior. We don't expect to

  // have more than a few IV increment chains in a loop. Missing a Chain falls

  // back to normal LSR behavior for those uses.

  static const unsigned MaxChains = 8;


  /// IV users can form a chain of IV increments.

  SmallVector<IVChain, MaxChains> IVChainVec;


  /// IV users that belong to profitable IVChains.

  SmallPtrSet<Use*, MaxChains> IVIncSet;


  /// Induction variables that were generated and inserted by the SCEV Expander.

  SmallVector<llvm::WeakVH, 2> ScalarEvolutionIVs;


  // Inserting instructions in the loop and using them as PHI's input could

  // break LCSSA in case if PHI's parent block is not a loop exit (i.e. the

  // corresponding incoming block is not loop exiting). So collect all such

  // instructions to form LCSSA for them later.

  SmallSetVector<Instruction *, 4> InsertedNonLCSSAInsts;


  void OptimizeShadowIV();

  bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);

  ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);

  void OptimizeLoopTermCond();


  void ChainInstruction(Instruction *UserInst, Instruction *IVOper,

                        SmallVectorImpl<ChainUsers> &ChainUsersVec);

  void FinalizeChain(IVChain &Chain);

  void CollectChains();

  void GenerateIVChain(const IVChain &Chain,

                       SmallVectorImpl<WeakTrackingVH> &DeadInsts);


  void CollectInterestingTypesAndFactors();

  void CollectFixupsAndInitialFormulae();


  // Support for sharing of LSRUses between LSRFixups.

  using UseMapTy = DenseMap<LSRUse::SCEVUseKindPair, size_t>;

  UseMapTy UseMap;


  bool reconcileNewOffset(LSRUse &LU, Immediate NewOffset, bool HasBaseReg,

                          LSRUse::KindType Kind, MemAccessTy AccessTy);


  std::pair<size_t, Immediate> getUse(const SCEV *&Expr, LSRUse::KindType Kind,

                                      MemAccessTy AccessTy);


  void DeleteUse(LSRUse &LU, size_t LUIdx);


  LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);


  void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);

  void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);

  void CountRegisters(const Formula &F, size_t LUIdx);

  bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);


  void CollectLoopInvariantFixupsAndFormulae();


  void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,

                              unsigned Depth = 0);


  void GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,

                                  const Formula &Base, unsigned Depth,

                                  size_t Idx, bool IsScaledReg = false);

  void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);

  void GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,

                                   const Formula &Base, size_t Idx,

                                   bool IsScaledReg = false);

  void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);

  void GenerateConstantOffsetsImpl(LSRUse &LU, unsigned LUIdx,

                                   const Formula &Base,

                                   const SmallVectorImpl<Immediate> &Worklist,

                                   size_t Idx, bool IsScaledReg = false);

  void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);

  void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);

  void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);

  void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base);

  void GenerateCrossUseConstantOffsets();

  void GenerateAllReuseFormulae();


  void FilterOutUndesirableDedicatedRegisters();


  size_t EstimateSearchSpaceComplexity() const;

  void NarrowSearchSpaceByDetectingSupersets();

  void NarrowSearchSpaceByCollapsingUnrolledCode();

  void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();

  void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();

  void NarrowSearchSpaceByFilterPostInc();

  void NarrowSearchSpaceByDeletingCostlyFormulas();

  void NarrowSearchSpaceByPickingWinnerRegs();

  void NarrowSearchSpaceUsingHeuristics();


  void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,

                    Cost &SolutionCost,

                    SmallVectorImpl<const Formula *> &Workspace,

                    const Cost &CurCost,

                    const SmallPtrSet<const SCEV *, 16> &CurRegs,

                    DenseSet<const SCEV *> &VisitedRegs) const;

  void Solve(SmallVectorImpl<const Formula *> &Solution) const;


  BasicBlock::iterator

  HoistInsertPosition(BasicBlock::iterator IP,

                      const SmallVectorImpl<Instruction *> &Inputs) const;

  BasicBlock::iterator AdjustInsertPositionForExpand(BasicBlock::iterator IP,

                                                     const LSRFixup &LF,

                                                     const LSRUse &LU) const;


  Value *Expand(const LSRUse &LU, const LSRFixup &LF, const Formula &F,

                BasicBlock::iterator IP,

                SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;

  void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF,

                     const Formula &F,

                     SmallVectorImpl<WeakTrackingVH> &DeadInsts);

  void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F,

               SmallVectorImpl<WeakTrackingVH> &DeadInsts);

  void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution);


public:

  LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT,

              LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC,

              TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU);


  bool getChanged() const { return Changed; }

  const SmallVectorImpl<WeakVH> &getScalarEvolutionIVs() const {

    return ScalarEvolutionIVs;

  }


  void print_factors_and_types(raw_ostream &OS) const;

  void print_fixups(raw_ostream &OS) const;

  void print_uses(raw_ostream &OS) const;

  void print(raw_ostream &OS) const;

  void dump() const;

};


} // end anonymous namespace


/// If IV is used in a int-to-float cast inside the loop then try to eliminate

/// the cast operation.

void LSRInstance::OptimizeShadowIV() {

  const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);

  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))

    return;


  for (IVUsers::const_iterator UI = IU.begin(), E = IU.end();

       UI != E; /* empty */) {

    IVUsers::const_iterator CandidateUI = UI;

    ++UI;

    Instruction *ShadowUse = CandidateUI->getUser();

    Type *DestTy = nullptr;

    bool IsSigned = false;


    /* If shadow use is a int->float cast then insert a second IV

       to eliminate this cast.


         for (unsigned i = 0; i < n; ++i)

           foo((double)i);


       is transformed into


         double d = 0.0;

         for (unsigned i = 0; i < n; ++i, ++d)

           foo(d);

    */

    if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {

      IsSigned = false;

      DestTy = UCast->getDestTy();

    }

    else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {

      IsSigned = true;

      DestTy = SCast->getDestTy();

    }

    if (!DestTy) continue;


    // If target does not support DestTy natively then do not apply

    // this transformation.

    if (!TTI.isTypeLegal(DestTy)) continue;


    PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));

    if (!PH) continue;

    if (PH->getNumIncomingValues() != 2) continue;


    // If the calculation in integers overflows, the result in FP type will

    // differ. So we only can do this transformation if we are guaranteed to not

    // deal with overflowing values

    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(PH));

    if (!AR) continue;

    if (IsSigned && !AR->hasNoSignedWrap()) continue;

    if (!IsSigned && !AR->hasNoUnsignedWrap()) continue;


    Type *SrcTy = PH->getType();

    int Mantissa = DestTy->getFPMantissaWidth();

    if (Mantissa == -1) continue;

    if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa)

      continue;


    unsigned Entry, Latch;

    if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {

      Entry = 0;

      Latch = 1;

    } else {

      Entry = 1;

      Latch = 0;

    }


    ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));

    if (!Init) continue;

    Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?

                                        (double)Init->getSExtValue() :

                                        (double)Init->getZExtValue());


    BinaryOperator *Incr =

      dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));

    if (!Incr) continue;

    if (Incr->getOpcode() != Instruction::Add

        && Incr->getOpcode() != Instruction::Sub)

      continue;


    /* Initialize new IV, double d = 0.0 in above example. */

    ConstantInt *C = nullptr;

    if (Incr->getOperand(0) == PH)

      C = dyn_cast<ConstantInt>(Incr->getOperand(1));

    else if (Incr->getOperand(1) == PH)

      C = dyn_cast<ConstantInt>(Incr->getOperand(0));

    else

      continue;


    if (!C) continue;


    // Ignore negative constants, as the code below doesn't handle them

    // correctly. TODO: Remove this restriction.

    if (!C->getValue().isStrictlyPositive())

      continue;


    /* Add new PHINode. */

    PHINode *NewPH = PHINode::Create(DestTy, 2, "IV.S.", PH->getIterator());

    NewPH->setDebugLoc(PH->getDebugLoc());


    /* create new increment. '++d' in above example. */

    Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());

    BinaryOperator *NewIncr = BinaryOperator::Create(

        Incr->getOpcode() == Instruction::Add ? Instruction::FAdd

                                              : Instruction::FSub,

        NewPH, CFP, "IV.S.next.", Incr->getIterator());

    NewIncr->setDebugLoc(Incr->getDebugLoc());


    NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));

    NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));


    /* Remove cast operation */

    ShadowUse->replaceAllUsesWith(NewPH);

    ShadowUse->eraseFromParent();

    Changed = true;

    break;

  }

}


/// If Cond has an operand that is an expression of an IV, set the IV user and

/// stride information and return true, otherwise return false.

bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {

  for (IVStrideUse &U : IU)

    if (U.getUser() == Cond) {

      // NOTE: we could handle setcc instructions with multiple uses here, but

      // InstCombine does it as well for simple uses, it's not clear that it

      // occurs enough in real life to handle.

      CondUse = &U;

      return true;

    }

  return false;

}


/// Rewrite the loop's terminating condition if it uses a max computation.

///

/// This is a narrow solution to a specific, but acute, problem. For loops

/// like this:

///

///   i = 0;

///   do {

///     p[i] = 0.0;

///   } while (++i < n);

///

/// the trip count isn't just 'n', because 'n' might not be positive. And

/// unfortunately this can come up even for loops where the user didn't use

/// a C do-while loop. For example, seemingly well-behaved top-test loops

/// will commonly be lowered like this:

///

///   if (n > 0) {

///     i = 0;

///     do {

///       p[i] = 0.0;

///     } while (++i < n);

///   }

///

/// and then it's possible for subsequent optimization to obscure the if

/// test in such a way that indvars can't find it.

///

/// When indvars can't find the if test in loops like this, it creates a

/// max expression, which allows it to give the loop a canonical

/// induction variable:

///

///   i = 0;

///   max = n < 1 ? 1 : n;

///   do {

///     p[i] = 0.0;

///   } while (++i != max);

///

/// Canonical induction variables are necessary because the loop passes

/// are designed around them. The most obvious example of this is the

/// LoopInfo analysis, which doesn't remember trip count values. It

/// expects to be able to rediscover the trip count each time it is

/// needed, and it does this using a simple analysis that only succeeds if

/// the loop has a canonical induction variable.

///

/// However, when it comes time to generate code, the maximum operation

/// can be quite costly, especially if it's inside of an outer loop.

///

/// This function solves this problem by detecting this type of loop and

/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting

/// the instructions for the maximum computation.

ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {

  // Check that the loop matches the pattern we're looking for.

  if (Cond->getPredicate() != CmpInst::ICMP_EQ &&

      Cond->getPredicate() != CmpInst::ICMP_NE)

    return Cond;


  SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));

  if (!Sel || !Sel->hasOneUse()) return Cond;


  const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);

  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))

    return Cond;

  const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1);


  // Add one to the backedge-taken count to get the trip count.

  const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount);

  if (IterationCount != SE.getSCEV(Sel)) return Cond;


  // Check for a max calculation that matches the pattern. There's no check

  // for ICMP_ULE here because the comparison would be with zero, which

  // isn't interesting.

  CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;

  const SCEVNAryExpr *Max = nullptr;

  if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {

    Pred = ICmpInst::ICMP_SLE;

    Max = S;

  } else if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {

    Pred = ICmpInst::ICMP_SLT;

    Max = S;

  } else if (const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {

    Pred = ICmpInst::ICMP_ULT;

    Max = U;

  } else {

    // No match; bail.

    return Cond;

  }


  // To handle a max with more than two operands, this optimization would

  // require additional checking and setup.

  if (Max->getNumOperands() != 2)

    return Cond;


  const SCEV *MaxLHS = Max->getOperand(0);

  const SCEV *MaxRHS = Max->getOperand(1);


  // ScalarEvolution canonicalizes constants to the left. For < and >, look

  // for a comparison with 1. For <= and >=, a comparison with zero.

  if (!MaxLHS ||

      (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->isZero() : (MaxLHS != One)))

    return Cond;


  // Check the relevant induction variable for conformance to

  // the pattern.

  const SCEV *IV = SE.getSCEV(Cond->getOperand(0));

  if (!match(IV,

             m_scev_AffineAddRec(m_scev_SpecificInt(1), m_scev_SpecificInt(1))))

    return Cond;


  assert(cast<SCEVAddRecExpr>(IV)->getLoop() == L &&

         "Loop condition operand is an addrec in a different loop!");


  // Check the right operand of the select, and remember it, as it will

  // be used in the new comparison instruction.

  Value *NewRHS = nullptr;

  if (ICmpInst::isTrueWhenEqual(Pred)) {

    // Look for n+1, and grab n.

    if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))

      if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))

         if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)

           NewRHS = BO->getOperand(0);

    if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2)))

      if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))

        if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)

          NewRHS = BO->getOperand(0);

    if (!NewRHS)

      return Cond;

  } else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)

    NewRHS = Sel->getOperand(1);

  else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)

    NewRHS = Sel->getOperand(2);

  else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))

    NewRHS = SU->getValue();

  else

    // Max doesn't match expected pattern.

    return Cond;


  // Determine the new comparison opcode. It may be signed or unsigned,

  // and the original comparison may be either equality or inequality.

  if (Cond->getPredicate() == CmpInst::ICMP_EQ)

    Pred = CmpInst::getInversePredicate(Pred);


  // Ok, everything looks ok to change the condition into an SLT or SGE and

  // delete the max calculation.

  ICmpInst *NewCond = new ICmpInst(Cond->getIterator(), Pred,

                                   Cond->getOperand(0), NewRHS, "scmp");


  // Delete the max calculation instructions.

  NewCond->setDebugLoc(Cond->getDebugLoc());

  Cond->replaceAllUsesWith(NewCond);

  CondUse->setUser(NewCond);

  Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));

  Cond->eraseFromParent();

  Sel->eraseFromParent();

  if (Cmp->use_empty()) {

    salvageDebugInfo(*Cmp);

    Cmp->eraseFromParent();

  }

  return NewCond;

}


/// Change loop terminating condition to use the postinc iv when possible.

void

LSRInstance::OptimizeLoopTermCond() {

  SmallPtrSet<Instruction *, 4> PostIncs;


  // We need a different set of heuristics for rotated and non-rotated loops.

  // If a loop is rotated then the latch is also the backedge, so inserting

  // post-inc expressions just before the latch is ideal. To reduce live ranges

  // it also makes sense to rewrite terminating conditions to use post-inc

  // expressions.

  //

  // If the loop is not rotated then the latch is not a backedge; the latch

  // check is done in the loop head. Adding post-inc expressions before the

  // latch will cause overlapping live-ranges of pre-inc and post-inc expressions

  // in the loop body. In this case we do *not* want to use post-inc expressions

  // in the latch check, and we want to insert post-inc expressions before

  // the backedge.

  BasicBlock *LatchBlock = L->getLoopLatch();

  SmallVector<BasicBlock*, 8> ExitingBlocks;

  L->getExitingBlocks(ExitingBlocks);

  if (!llvm::is_contained(ExitingBlocks, LatchBlock)) {

    // The backedge doesn't exit the loop; treat this as a head-tested loop.

    IVIncInsertPos = LatchBlock->getTerminator();

    return;

  }


  // Otherwise treat this as a rotated loop.

  for (BasicBlock *ExitingBlock : ExitingBlocks) {

    // Get the terminating condition for the loop if possible.  If we

    // can, we want to change it to use a post-incremented version of its

    // induction variable, to allow coalescing the live ranges for the IV into

    // one register value.


    BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());

    if (!TermBr)

      continue;

    // FIXME: Overly conservative, termination condition could be an 'or' etc..

    if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))

      continue;


    // Search IVUsesByStride to find Cond's IVUse if there is one.

    IVStrideUse *CondUse = nullptr;

    ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());

    if (!FindIVUserForCond(Cond, CondUse))

      continue;


    // If the trip count is computed in terms of a max (due to ScalarEvolution

    // being unable to find a sufficient guard, for example), change the loop

    // comparison to use SLT or ULT instead of NE.

    // One consequence of doing this now is that it disrupts the count-down

    // optimization. That's not always a bad thing though, because in such

    // cases it may still be worthwhile to avoid a max.

    Cond = OptimizeMax(Cond, CondUse);


    // If this exiting block dominates the latch block, it may also use

    // the post-inc value if it won't be shared with other uses.

    // Check for dominance.

    if (!DT.dominates(ExitingBlock, LatchBlock))

      continue;


    // Conservatively avoid trying to use the post-inc value in non-latch

    // exits if there may be pre-inc users in intervening blocks.

    if (LatchBlock != ExitingBlock)

      for (const IVStrideUse &UI : IU)

        // Test if the use is reachable from the exiting block. This dominator

        // query is a conservative approximation of reachability.

        if (&UI != CondUse &&

            !DT.properlyDominates(UI.getUser()->getParent(), ExitingBlock)) {

          // Conservatively assume there may be reuse if the quotient of their

          // strides could be a legal scale.

          const SCEV *A = IU.getStride(*CondUse, L);

          const SCEV *B = IU.getStride(UI, L);

          if (!A || !B) continue;

          if (SE.getTypeSizeInBits(A->getType()) !=

              SE.getTypeSizeInBits(B->getType())) {

            if (SE.getTypeSizeInBits(A->getType()) >

                SE.getTypeSizeInBits(B->getType()))

              B = SE.getSignExtendExpr(B, A->getType());

            else

              A = SE.getSignExtendExpr(A, B->getType());

          }

          if (const SCEVConstant *D =

                dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) {

            const ConstantInt *C = D->getValue();

            // Stride of one or negative one can have reuse with non-addresses.

            if (C->isOne() || C->isMinusOne())

              goto decline_post_inc;

            // Avoid weird situations.

            if (C->getValue().getSignificantBits() >= 64 ||

                C->getValue().isMinSignedValue())

              goto decline_post_inc;

            // Check for possible scaled-address reuse.

            if (isAddressUse(TTI, UI.getUser(), UI.getOperandValToReplace())) {

              MemAccessTy AccessTy =

                  getAccessType(TTI, UI.getUser(), UI.getOperandValToReplace());

              int64_t Scale = C->getSExtValue();

              if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,

                                            /*BaseOffset=*/0,

                                            /*HasBaseReg=*/true, Scale,

                                            AccessTy.AddrSpace))

                goto decline_post_inc;

              Scale = -Scale;

              if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,

                                            /*BaseOffset=*/0,

                                            /*HasBaseReg=*/true, Scale,

                                            AccessTy.AddrSpace))

                goto decline_post_inc;

            }

          }

        }


    LLVM_DEBUG(dbgs() << "  Change loop exiting icmp to use postinc iv: "

                      << *Cond << '\n');


    // It's possible for the setcc instruction to be anywhere in the loop, and

    // possible for it to have multiple users.  If it is not immediately before

    // the exiting block branch, move it.

    if (Cond->getNextNode() != TermBr) {

      if (Cond->hasOneUse()) {

        Cond->moveBefore(TermBr->getIterator());

      } else {

        // Clone the terminating condition and insert into the loopend.

        ICmpInst *OldCond = Cond;

        Cond = cast<ICmpInst>(Cond->clone());

        Cond->setName(L->getHeader()->getName() + ".termcond");

        Cond->insertInto(ExitingBlock, TermBr->getIterator());


        // Clone the IVUse, as the old use still exists!

        CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace());

        TermBr->replaceUsesOfWith(OldCond, Cond);

      }

    }


    // If we get to here, we know that we can transform the setcc instruction to

    // use the post-incremented version of the IV, allowing us to coalesce the

    // live ranges for the IV correctly.

    CondUse->transformToPostInc(L);

    Changed = true;


    PostIncs.insert(Cond);

  decline_post_inc:;

  }


  // Determine an insertion point for the loop induction variable increment. It

  // must dominate all the post-inc comparisons we just set up, and it must

  // dominate the loop latch edge.

  IVIncInsertPos = L->getLoopLatch()->getTerminator();

  for (Instruction *Inst : PostIncs)

    IVIncInsertPos = DT.findNearestCommonDominator(IVIncInsertPos, Inst);

}


/// Determine if the given use can accommodate a fixup at the given offset and

/// other details. If so, update the use and return true.

bool LSRInstance::reconcileNewOffset(LSRUse &LU, Immediate NewOffset,

                                     bool HasBaseReg, LSRUse::KindType Kind,

                                     MemAccessTy AccessTy) {

  Immediate NewMinOffset = LU.MinOffset;

  Immediate NewMaxOffset = LU.MaxOffset;

  MemAccessTy NewAccessTy = AccessTy;


  // Check for a mismatched kind. It's tempting to collapse mismatched kinds to

  // something conservative, however this can pessimize in the case that one of

  // the uses will have all its uses outside the loop, for example.

  if (LU.Kind != Kind)

    return false;


  // Check for a mismatched access type, and fall back conservatively as needed.

  // TODO: Be less conservative when the type is similar and can use the same

  // addressing modes.

  if (Kind == LSRUse::Address) {

    if (AccessTy.MemTy != LU.AccessTy.MemTy) {

      NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(),

                                            AccessTy.AddrSpace);

    }

  }


  // Conservatively assume HasBaseReg is true for now.

  if (Immediate::isKnownLT(NewOffset, LU.MinOffset)) {

    if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,

                          LU.MaxOffset - NewOffset, HasBaseReg))

      return false;

    NewMinOffset = NewOffset;

  } else if (Immediate::isKnownGT(NewOffset, LU.MaxOffset)) {

    if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,

                          NewOffset - LU.MinOffset, HasBaseReg))

      return false;

    NewMaxOffset = NewOffset;

  }


  // FIXME: We should be able to handle some level of scalable offset support

  // for 'void', but in order to get basic support up and running this is

  // being left out.

  if (NewAccessTy.MemTy && NewAccessTy.MemTy->isVoidTy() &&

      (NewMinOffset.isScalable() || NewMaxOffset.isScalable()))

    return false;


  // Update the use.

  LU.MinOffset = NewMinOffset;

  LU.MaxOffset = NewMaxOffset;

  LU.AccessTy = NewAccessTy;

  return true;

}


/// Return an LSRUse index and an offset value for a fixup which needs the given

/// expression, with the given kind and optional access type.  Either reuse an

/// existing use or create a new one, as needed.

std::pair<size_t, Immediate> LSRInstance::getUse(const SCEV *&Expr,

                                                 LSRUse::KindType Kind,

                                                 MemAccessTy AccessTy) {

  const SCEV *Copy = Expr;

  Immediate Offset = ExtractImmediate(Expr, SE);


  // Basic uses can't accept any offset, for example.

  if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ nullptr,

                        Offset, /*HasBaseReg=*/ true)) {

    Expr = Copy;

    Offset = Immediate::getFixed(0);

  }


  std::pair<UseMapTy::iterator, bool> P =

      UseMap.try_emplace(LSRUse::SCEVUseKindPair(Expr, Kind));

  if (!P.second) {

    // A use already existed with this base.

    size_t LUIdx = P.first->second;

    LSRUse &LU = Uses[LUIdx];

    if (reconcileNewOffset(LU, Offset, /*HasBaseReg=*/true, Kind, AccessTy))

      // Reuse this use.

      return std::make_pair(LUIdx, Offset);

  }


  // Create a new use.

  size_t LUIdx = Uses.size();

  P.first->second = LUIdx;

  Uses.push_back(LSRUse(Kind, AccessTy));

  LSRUse &LU = Uses[LUIdx];


  LU.MinOffset = Offset;

  LU.MaxOffset = Offset;

  return std::make_pair(LUIdx, Offset);

}


/// Delete the given use from the Uses list.

void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) {

  if (&LU != &Uses.back())

    std::swap(LU, Uses.back());

  Uses.pop_back();


  // Update RegUses.

  RegUses.swapAndDropUse(LUIdx, Uses.size());

}


/// Look for a use distinct from OrigLU which is has a formula that has the same

/// registers as the given formula.

LSRUse *

LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,

                                       const LSRUse &OrigLU) {

  // Search all uses for the formula. This could be more clever.

  for (LSRUse &LU : Uses) {

    // Check whether this use is close enough to OrigLU, to see whether it's

    // worthwhile looking through its formulae.

    // Ignore ICmpZero uses because they may contain formulae generated by

    // GenerateICmpZeroScales, in which case adding fixup offsets may

    // be invalid.

    if (&LU != &OrigLU &&

        LU.Kind != LSRUse::ICmpZero &&

        LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&

        LU.WidestFixupType == OrigLU.WidestFixupType &&

        LU.HasFormulaWithSameRegs(OrigF)) {

      // Scan through this use's formulae.

      for (const Formula &F : LU.Formulae) {

        // Check to see if this formula has the same registers and symbols

        // as OrigF.

        if (F.BaseRegs == OrigF.BaseRegs &&

            F.ScaledReg == OrigF.ScaledReg &&

            F.BaseGV == OrigF.BaseGV &&

            F.Scale == OrigF.Scale &&

            F.UnfoldedOffset == OrigF.UnfoldedOffset) {

          if (F.BaseOffset.isZero())

            return &LU;

          // This is the formula where all the registers and symbols matched;

          // there aren't going to be any others. Since we declined it, we

          // can skip the rest of the formulae and proceed to the next LSRUse.

          break;

        }

      }

    }

  }


  // Nothing looked good.

  return nullptr;

}


void LSRInstance::CollectInterestingTypesAndFactors() {

  SmallSetVector<const SCEV *, 4> Strides;


  // Collect interesting types and strides.

  SmallVector<const SCEV *, 4> Worklist;

  for (const IVStrideUse &U : IU) {

    const SCEV *Expr = IU.getExpr(U);

    if (!Expr)

      continue;


    // Collect interesting types.

    Types.insert(SE.getEffectiveSCEVType(Expr->getType()));


    // Add strides for mentioned loops.

    Worklist.push_back(Expr);

    do {

      const SCEV *S = Worklist.pop_back_val();

      if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {

        if (AR->getLoop() == L)

          Strides.insert(AR->getStepRecurrence(SE));

        Worklist.push_back(AR->getStart());

      } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {

        append_range(Worklist, Add->operands());

      }

    } while (!Worklist.empty());

  }


  // Compute interesting factors from the set of interesting strides.

  for (SmallSetVector<const SCEV *, 4>::const_iterator

       I = Strides.begin(), E = Strides.end(); I != E; ++I)

    for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =

         std::next(I); NewStrideIter != E; ++NewStrideIter) {

      const SCEV *OldStride = *I;

      const SCEV *NewStride = *NewStrideIter;


      if (SE.getTypeSizeInBits(OldStride->getType()) !=

          SE.getTypeSizeInBits(NewStride->getType())) {

        if (SE.getTypeSizeInBits(OldStride->getType()) >

            SE.getTypeSizeInBits(NewStride->getType()))

          NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType());

        else

          OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType());

      }

      if (const SCEVConstant *Factor =

            dyn_cast_or_null<SCEVConstant>(getExactSDiv(NewStride, OldStride,

                                                        SE, true))) {

        if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())

          Factors.insert(Factor->getAPInt().getSExtValue());

      } else if (const SCEVConstant *Factor =

                   dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride,

                                                               NewStride,

                                                               SE, true))) {

        if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())

          Factors.insert(Factor->getAPInt().getSExtValue());

      }

    }


  // If all uses use the same type, don't bother looking for truncation-based

  // reuse.

  if (Types.size() == 1)

    Types.clear();


  LLVM_DEBUG(print_factors_and_types(dbgs()));

}


/// Helper for CollectChains that finds an IV operand (computed by an AddRec in

/// this loop) within [OI,OE) or returns OE. If IVUsers mapped Instructions to

/// IVStrideUses, we could partially skip this.

static User::op_iterator

findIVOperand(User::op_iterator OI, User::op_iterator OE,

              Loop *L, ScalarEvolution &SE) {

  for(; OI != OE; ++OI) {

    if (Instruction *Oper = dyn_cast<Instruction>(*OI)) {

      if (!SE.isSCEVable(Oper->getType()))

        continue;


      if (const SCEVAddRecExpr *AR =

          dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Oper))) {

        if (AR->getLoop() == L)

          break;

      }

    }

  }

  return OI;

}


/// IVChain logic must consistently peek base TruncInst operands, so wrap it in

/// a convenient helper.

static Value *getWideOperand(Value *Oper) {

  if (TruncInst *Trunc = dyn_cast<TruncInst>(Oper))

    return Trunc->getOperand(0);

  return Oper;

}


/// Return an approximation of this SCEV expression's "base", or NULL for any

/// constant. Returning the expression itself is conservative. Returning a

/// deeper subexpression is more precise and valid as long as it isn't less

/// complex than another subexpression. For expressions involving multiple

/// unscaled values, we need to return the pointer-type SCEVUnknown. This avoids

/// forming chains across objects, such as: PrevOper==a[i], IVOper==b[i],

/// IVInc==b-a.

///

/// Since SCEVUnknown is the rightmost type, and pointers are the rightmost

/// SCEVUnknown, we simply return the rightmost SCEV operand.

static const SCEV *getExprBase(const SCEV *S) {

  switch (S->getSCEVType()) {

  default: // including scUnknown.

    return S;

  case scConstant:

  case scVScale:

    return nullptr;

  case scTruncate:

    return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());

  case scZeroExtend:

    return getExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());

  case scSignExtend:

    return getExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());

  case scAddExpr: {

    // Skip over scaled operands (scMulExpr) to follow add operands as long as

    // there's nothing more complex.

    // FIXME: not sure if we want to recognize negation.

    const SCEVAddExpr *Add = cast<SCEVAddExpr>(S);

    for (const SCEV *SubExpr : reverse(Add->operands())) {

      if (SubExpr->getSCEVType() == scAddExpr)

        return getExprBase(SubExpr);


      if (SubExpr->getSCEVType() != scMulExpr)

        return SubExpr;

    }

    return S; // all operands are scaled, be conservative.

  }

  case scAddRecExpr:

    return getExprBase(cast<SCEVAddRecExpr>(S)->getStart());

  }

  llvm_unreachable("Unknown SCEV kind!");

}


/// Return true if the chain increment is profitable to expand into a loop

/// invariant value, which may require its own register. A profitable chain

/// increment will be an offset relative to the same base. We allow such offsets

/// to potentially be used as chain increment as long as it's not obviously

/// expensive to expand using real instructions.

bool IVChain::isProfitableIncrement(const SCEV *OperExpr,

                                    const SCEV *IncExpr,

                                    ScalarEvolution &SE) {

  // Aggressively form chains when -stress-ivchain.

  if (StressIVChain)

    return true;


  // Do not replace a constant offset from IV head with a nonconstant IV

  // increment.

  if (!isa<SCEVConstant>(IncExpr)) {

    const SCEV *HeadExpr = SE.getSCEV(getWideOperand(Incs[0].IVOperand));

    if (isa<SCEVConstant>(SE.getMinusSCEV(OperExpr, HeadExpr)))

      return false;

  }


  SmallPtrSet<const SCEV*, 8> Processed;

  return !isHighCostExpansion(IncExpr, Processed, SE);

}


/// Return true if the number of registers needed for the chain is estimated to

/// be less than the number required for the individual IV users. First prohibit

/// any IV users that keep the IV live across increments (the Users set should

/// be empty). Next count the number and type of increments in the chain.

///

/// Chaining IVs can lead to considerable code bloat if ISEL doesn't

/// effectively use postinc addressing modes. Only consider it profitable it the

/// increments can be computed in fewer registers when chained.

///

/// TODO: Consider IVInc free if it's already used in another chains.

static bool isProfitableChain(IVChain &Chain,

                              SmallPtrSetImpl<Instruction *> &Users,

                              ScalarEvolution &SE,

                              const TargetTransformInfo &TTI) {

  if (StressIVChain)

    return true;


  if (!Chain.hasIncs())

    return false;


  if (!Users.empty()) {

    LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " users:\n";

               for (Instruction *Inst

                    : Users) { dbgs() << "  " << *Inst << "\n"; });

    return false;

  }

  assert(!Chain.Incs.empty() && "empty IV chains are not allowed");


  // The chain itself may require a register, so intialize cost to 1.

  int cost = 1;


  // A complete chain likely eliminates the need for keeping the original IV in

  // a register. LSR does not currently know how to form a complete chain unless

  // the header phi already exists.

  if (isa<PHINode>(Chain.tailUserInst())

      && SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {

    --cost;

  }

  const SCEV *LastIncExpr = nullptr;

  unsigned NumConstIncrements = 0;

  unsigned NumVarIncrements = 0;

  unsigned NumReusedIncrements = 0;


  if (TTI.isProfitableLSRChainElement(Chain.Incs[0].UserInst))

    return true;


  for (const IVInc &Inc : Chain) {

    if (TTI.isProfitableLSRChainElement(Inc.UserInst))

      return true;

    if (Inc.IncExpr->isZero())

      continue;


    // Incrementing by zero or some constant is neutral. We assume constants can

    // be folded into an addressing mode or an add's immediate operand.

    if (isa<SCEVConstant>(Inc.IncExpr)) {

      ++NumConstIncrements;

      continue;

    }


    if (Inc.IncExpr == LastIncExpr)

      ++NumReusedIncrements;

    else

      ++NumVarIncrements;


    LastIncExpr = Inc.IncExpr;

  }

  // An IV chain with a single increment is handled by LSR's postinc

  // uses. However, a chain with multiple increments requires keeping the IV's

  // value live longer than it needs to be if chained.

  if (NumConstIncrements > 1)

    --cost;


  // Materializing increment expressions in the preheader that didn't exist in

  // the original code may cost a register. For example, sign-extended array

  // indices can produce ridiculous increments like this:

  // IV + ((sext i32 (2 * %s) to i64) + (-1 * (sext i32 %s to i64)))

  cost += NumVarIncrements;


  // Reusing variable increments likely saves a register to hold the multiple of

  // the stride.

  cost -= NumReusedIncrements;


  LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " Cost: " << cost

                    << "\n");


  return cost < 0;

}


/// Add this IV user to an existing chain or make it the head of a new chain.

void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,

                                   SmallVectorImpl<ChainUsers> &ChainUsersVec) {

  // When IVs are used as types of varying widths, they are generally converted

  // to a wider type with some uses remaining narrow under a (free) trunc.

  Value *const NextIV = getWideOperand(IVOper);

  const SCEV *const OperExpr = SE.getSCEV(NextIV);

  const SCEV *const OperExprBase = getExprBase(OperExpr);


  // Visit all existing chains. Check if its IVOper can be computed as a

  // profitable loop invariant increment from the last link in the Chain.

  unsigned ChainIdx = 0, NChains = IVChainVec.size();

  const SCEV *LastIncExpr = nullptr;

  for (; ChainIdx < NChains; ++ChainIdx) {

    IVChain &Chain = IVChainVec[ChainIdx];


    // Prune the solution space aggressively by checking that both IV operands

    // are expressions that operate on the same unscaled SCEVUnknown. This

    // "base" will be canceled by the subsequent getMinusSCEV call. Checking

    // first avoids creating extra SCEV expressions.

    if (!StressIVChain && Chain.ExprBase != OperExprBase)

      continue;


    Value *PrevIV = getWideOperand(Chain.Incs.back().IVOperand);

    if (PrevIV->getType() != NextIV->getType())

      continue;


    // A phi node terminates a chain.

    if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))

      continue;


    // The increment must be loop-invariant so it can be kept in a register.

    const SCEV *PrevExpr = SE.getSCEV(PrevIV);

    const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr);

    if (isa<SCEVCouldNotCompute>(IncExpr) || !SE.isLoopInvariant(IncExpr, L))

      continue;


    if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {

      LastIncExpr = IncExpr;

      break;

    }

  }

  // If we haven't found a chain, create a new one, unless we hit the max. Don't

  // bother for phi nodes, because they must be last in the chain.

  if (ChainIdx == NChains) {

    if (isa<PHINode>(UserInst))

      return;

    if (NChains >= MaxChains && !StressIVChain) {

      LLVM_DEBUG(dbgs() << "IV Chain Limit\n");

      return;

    }

    LastIncExpr = OperExpr;

    // IVUsers may have skipped over sign/zero extensions. We don't currently

    // attempt to form chains involving extensions unless they can be hoisted

    // into this loop's AddRec.

    if (!isa<SCEVAddRecExpr>(LastIncExpr))

      return;

    ++NChains;

    IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),

                                 OperExprBase));

    ChainUsersVec.resize(NChains);

    LLVM_DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Head: (" << *UserInst

                      << ") IV=" << *LastIncExpr << "\n");

  } else {

    LLVM_DEBUG(dbgs() << "IV Chain#" << ChainIdx << "  Inc: (" << *UserInst

                      << ") IV+" << *LastIncExpr << "\n");

    // Add this IV user to the end of the chain.

    IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));

  }

  IVChain &Chain = IVChainVec[ChainIdx];


  SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;

  // This chain's NearUsers become FarUsers.

  if (!LastIncExpr->isZero()) {

    ChainUsersVec[ChainIdx].FarUsers.insert_range(NearUsers);

    NearUsers.clear();

  }


  // All other uses of IVOperand become near uses of the chain.

  // We currently ignore intermediate values within SCEV expressions, assuming

  // they will eventually be used be the current chain, or can be computed

  // from one of the chain increments. To be more precise we could

  // transitively follow its user and only add leaf IV users to the set.

  for (User *U : IVOper->users()) {

    Instruction *OtherUse = dyn_cast<Instruction>(U);

    if (!OtherUse)

      continue;

    // Uses in the chain will no longer be uses if the chain is formed.

    // Include the head of the chain in this iteration (not Chain.begin()).

    IVChain::const_iterator IncIter = Chain.Incs.begin();

    IVChain::const_iterator IncEnd = Chain.Incs.end();

    for( ; IncIter != IncEnd; ++IncIter) {

      if (IncIter->UserInst == OtherUse)

        break;

    }

    if (IncIter != IncEnd)

      continue;


    if (SE.isSCEVable(OtherUse->getType())

        && !isa<SCEVUnknown>(SE.getSCEV(OtherUse))

        && IU.isIVUserOrOperand(OtherUse)) {

      continue;

    }

    NearUsers.insert(OtherUse);

  }


  // Since this user is part of the chain, it's no longer considered a use

  // of the chain.

  ChainUsersVec[ChainIdx].FarUsers.erase(UserInst);

}


/// Populate the vector of Chains.

///

/// This decreases ILP at the architecture level. Targets with ample registers,

/// multiple memory ports, and no register renaming probably don't want

/// this. However, such targets should probably disable LSR altogether.

///

/// The job of LSR is to make a reasonable choice of induction variables across

/// the loop. Subsequent passes can easily "unchain" computation exposing more

/// ILP *within the loop* if the target wants it.

///

/// Finding the best IV chain is potentially a scheduling problem. Since LSR

/// will not reorder memory operations, it will recognize this as a chain, but

/// will generate redundant IV increments. Ideally this would be corrected later

/// by a smart scheduler:

///        = A[i]

///        = A[i+x]

/// A[i]   =

/// A[i+x] =

///

/// TODO: Walk the entire domtree within this loop, not just the path to the

/// loop latch. This will discover chains on side paths, but requires

/// maintaining multiple copies of the Chains state.

void LSRInstance::CollectChains() {

  LLVM_DEBUG(dbgs() << "Collecting IV Chains.\n");

  SmallVector<ChainUsers, 8> ChainUsersVec;


  SmallVector<BasicBlock *,8> LatchPath;

  BasicBlock *LoopHeader = L->getHeader();

  for (DomTreeNode *Rung = DT.getNode(L->getLoopLatch());

       Rung->getBlock() != LoopHeader; Rung = Rung->getIDom()) {

    LatchPath.push_back(Rung->getBlock());

  }

  LatchPath.push_back(LoopHeader);


  // Walk the instruction stream from the loop header to the loop latch.

  for (BasicBlock *BB : reverse(LatchPath)) {

    for (Instruction &I : *BB) {

      // Skip instructions that weren't seen by IVUsers analysis.

      if (isa<PHINode>(I) || !IU.isIVUserOrOperand(&I))

        continue;


      // Ignore users that are part of a SCEV expression. This way we only

      // consider leaf IV Users. This effectively rediscovers a portion of

      // IVUsers analysis but in program order this time.

      if (SE.isSCEVable(I.getType()) && !isa<SCEVUnknown>(SE.getSCEV(&I)))

          continue;


      // Remove this instruction from any NearUsers set it may be in.

      for (unsigned ChainIdx = 0, NChains = IVChainVec.size();

           ChainIdx < NChains; ++ChainIdx) {

        ChainUsersVec[ChainIdx].NearUsers.erase(&I);

      }

      // Search for operands that can be chained.

      SmallPtrSet<Instruction*, 4> UniqueOperands;

      User::op_iterator IVOpEnd = I.op_end();

      User::op_iterator IVOpIter = findIVOperand(I.op_begin(), IVOpEnd, L, SE);

      while (IVOpIter != IVOpEnd) {

        Instruction *IVOpInst = cast<Instruction>(*IVOpIter);

        if (UniqueOperands.insert(IVOpInst).second)

          ChainInstruction(&I, IVOpInst, ChainUsersVec);

        IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);

      }

    } // Continue walking down the instructions.

  } // Continue walking down the domtree.

  // Visit phi backedges to determine if the chain can generate the IV postinc.

  for (PHINode &PN : L->getHeader()->phis()) {

    if (!SE.isSCEVable(PN.getType()))

      continue;


    Instruction *IncV =

        dyn_cast<Instruction>(PN.getIncomingValueForBlock(L->getLoopLatch()));

    if (IncV)

      ChainInstruction(&PN, IncV, ChainUsersVec);

  }

  // Remove any unprofitable chains.

  unsigned ChainIdx = 0;

  for (unsigned UsersIdx = 0, NChains = IVChainVec.size();

       UsersIdx < NChains; ++UsersIdx) {

    if (!isProfitableChain(IVChainVec[UsersIdx],

                           ChainUsersVec[UsersIdx].FarUsers, SE, TTI))

      continue;

    // Preserve the chain at UsesIdx.

    if (ChainIdx != UsersIdx)

      IVChainVec[ChainIdx] = IVChainVec[UsersIdx];

    FinalizeChain(IVChainVec[ChainIdx]);

    ++ChainIdx;

  }

  IVChainVec.resize(ChainIdx);

}


void LSRInstance::FinalizeChain(IVChain &Chain) {

  assert(!Chain.Incs.empty() && "empty IV chains are not allowed");

  LLVM_DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n");


  for (const IVInc &Inc : Chain) {

    LLVM_DEBUG(dbgs() << "        Inc: " << *Inc.UserInst << "\n");

    auto UseI = find(Inc.UserInst->operands(), Inc.IVOperand);

    assert(UseI != Inc.UserInst->op_end() && "cannot find IV operand");

    IVIncSet.insert(UseI);

  }

}


/// Return true if the IVInc can be folded into an addressing mode.

static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,

                             Value *Operand, const TargetTransformInfo &TTI) {

  const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);

  Immediate IncOffset = Immediate::getZero();

  if (IncConst) {

    if (IncConst && IncConst->getAPInt().getSignificantBits() > 64)

      return false;

    IncOffset = Immediate::getFixed(IncConst->getValue()->getSExtValue());

  } else {

    // Look for mul(vscale, constant), to detect a scalable offset.

    const APInt *C;

    if (!match(IncExpr, m_scev_Mul(m_scev_APInt(C), m_SCEVVScale())) ||

        C->getSignificantBits() > 64)

      return false;

    IncOffset = Immediate::getScalable(C->getSExtValue());

  }


  if (!isAddressUse(TTI, UserInst, Operand))

    return false;


  MemAccessTy AccessTy = getAccessType(TTI, UserInst, Operand);

  if (!isAlwaysFoldable(TTI, LSRUse::Address, AccessTy, /*BaseGV=*/nullptr,

                        IncOffset, /*HasBaseReg=*/false))

    return false;


  return true;

}


/// Generate an add or subtract for each IVInc in a chain to materialize the IV

/// user's operand from the previous IV user's operand.

void LSRInstance::GenerateIVChain(const IVChain &Chain,

                                  SmallVectorImpl<WeakTrackingVH> &DeadInsts) {

  // Find the new IVOperand for the head of the chain. It may have been replaced

  // by LSR.

  const IVInc &Head = Chain.Incs[0];

  User::op_iterator IVOpEnd = Head.UserInst->op_end();

  // findIVOperand returns IVOpEnd if it can no longer find a valid IV user.

  User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(),

                                             IVOpEnd, L, SE);

  Value *IVSrc = nullptr;

  while (IVOpIter != IVOpEnd) {

    IVSrc = getWideOperand(*IVOpIter);


    // If this operand computes the expression that the chain needs, we may use

    // it. (Check this after setting IVSrc which is used below.)

    //

    // Note that if Head.IncExpr is wider than IVSrc, then this phi is too

    // narrow for the chain, so we can no longer use it. We do allow using a

    // wider phi, assuming the LSR checked for free truncation. In that case we

    // should already have a truncate on this operand such that

    // getSCEV(IVSrc) == IncExpr.

    if (SE.getSCEV(*IVOpIter) == Head.IncExpr

        || SE.getSCEV(IVSrc) == Head.IncExpr) {

      break;

    }

    IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);

  }

  if (IVOpIter == IVOpEnd) {

    // Gracefully give up on this chain.

    LLVM_DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n");

    return;

  }

  assert(IVSrc && "Failed to find IV chain source");


  LLVM_DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n");

  Type *IVTy = IVSrc->getType();

  Type *IntTy = SE.getEffectiveSCEVType(IVTy);

  const SCEV *LeftOverExpr = nullptr;

  const SCEV *Accum = SE.getZero(IntTy);

  SmallVector<std::pair<const SCEV *, Value *>> Bases;

  Bases.emplace_back(Accum, IVSrc);


  for (const IVInc &Inc : Chain) {

    Instruction *InsertPt = Inc.UserInst;

    if (isa<PHINode>(InsertPt))

      InsertPt = L->getLoopLatch()->getTerminator();


    // IVOper will replace the current IV User's operand. IVSrc is the IV

    // value currently held in a register.

    Value *IVOper = IVSrc;

    if (!Inc.IncExpr->isZero()) {

      // IncExpr was the result of subtraction of two narrow values, so must

      // be signed.

      const SCEV *IncExpr = SE.getNoopOrSignExtend(Inc.IncExpr, IntTy);

      Accum = SE.getAddExpr(Accum, IncExpr);

      LeftOverExpr = LeftOverExpr ?

        SE.getAddExpr(LeftOverExpr, IncExpr) : IncExpr;

    }


    // Look through each base to see if any can produce a nice addressing mode.

    bool FoundBase = false;

    for (auto [MapScev, MapIVOper] : reverse(Bases)) {

      const SCEV *Remainder = SE.getMinusSCEV(Accum, MapScev);

      if (canFoldIVIncExpr(Remainder, Inc.UserInst, Inc.IVOperand, TTI)) {

        if (!Remainder->isZero()) {

          Rewriter.clearPostInc();

          Value *IncV = Rewriter.expandCodeFor(Remainder, IntTy, InsertPt);

          const SCEV *IVOperExpr =

              SE.getAddExpr(SE.getUnknown(MapIVOper), SE.getUnknown(IncV));

          IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);

        } else {

          IVOper = MapIVOper;

        }


        FoundBase = true;

        break;

      }

    }

    if (!FoundBase && LeftOverExpr && !LeftOverExpr->isZero()) {

      // Expand the IV increment.

      Rewriter.clearPostInc();

      Value *IncV = Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);

      const SCEV *IVOperExpr = SE.getAddExpr(SE.getUnknown(IVSrc),

                                             SE.getUnknown(IncV));

      IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);


      // If an IV increment can't be folded, use it as the next IV value.

      if (!canFoldIVIncExpr(LeftOverExpr, Inc.UserInst, Inc.IVOperand, TTI)) {

        assert(IVTy == IVOper->getType() && "inconsistent IV increment type");

        Bases.emplace_back(Accum, IVOper);

        IVSrc = IVOper;

        LeftOverExpr = nullptr;

      }

    }

    Type *OperTy = Inc.IVOperand->getType();

    if (IVTy != OperTy) {

      assert(SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) &&

             "cannot extend a chained IV");

      IRBuilder<> Builder(InsertPt);

      IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy, "lsr.chain");

    }

    Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);

    if (auto *OperandIsInstr = dyn_cast<Instruction>(Inc.IVOperand))

      DeadInsts.emplace_back(OperandIsInstr);

  }

  // If LSR created a new, wider phi, we may also replace its postinc. We only

  // do this if we also found a wide value for the head of the chain.

  if (isa<PHINode>(Chain.tailUserInst())) {

    for (PHINode &Phi : L->getHeader()->phis()) {

      if (Phi.getType() != IVSrc->getType())

        continue;

      Instruction *PostIncV = dyn_cast<Instruction>(

          Phi.getIncomingValueForBlock(L->getLoopLatch()));

      if (!PostIncV || (SE.getSCEV(PostIncV) != SE.getSCEV(IVSrc)))

        continue;

      Value *IVOper = IVSrc;

      Type *PostIncTy = PostIncV->getType();

      if (IVTy != PostIncTy) {

        assert(PostIncTy->isPointerTy() && "mixing int/ptr IV types");

        IRBuilder<> Builder(L->getLoopLatch()->getTerminator());

        Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc());

        IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy, "lsr.chain");

      }

      Phi.replaceUsesOfWith(PostIncV, IVOper);

      DeadInsts.emplace_back(PostIncV);

    }

  }

}


void LSRInstance::CollectFixupsAndInitialFormulae() {

  BranchInst *ExitBranch = nullptr;

  bool SaveCmp = TTI.canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI);


  // For calculating baseline cost

  SmallPtrSet<const SCEV *, 16> Regs;

  DenseSet<const SCEV *> VisitedRegs;

  DenseSet<size_t> VisitedLSRUse;


  for (const IVStrideUse &U : IU) {

    Instruction *UserInst = U.getUser();

    // Skip IV users that are part of profitable IV Chains.

    User::op_iterator UseI =

        find(UserInst->operands(), U.getOperandValToReplace());

    assert(UseI != UserInst->op_end() && "cannot find IV operand");

    if (IVIncSet.count(UseI)) {

      LLVM_DEBUG(dbgs() << "Use is in profitable chain: " << **UseI << '\n');

      continue;

    }


    LSRUse::KindType Kind = LSRUse::Basic;

    MemAccessTy AccessTy;

    if (isAddressUse(TTI, UserInst, U.getOperandValToReplace())) {

      Kind = LSRUse::Address;

      AccessTy = getAccessType(TTI, UserInst, U.getOperandValToReplace());

    }


    const SCEV *S = IU.getExpr(U);

    if (!S)

      continue;

    PostIncLoopSet TmpPostIncLoops = U.getPostIncLoops();


    // Equality (== and !=) ICmps are special. We can rewrite (i == N) as

    // (N - i == 0), and this allows (N - i) to be the expression that we work

    // with rather than just N or i, so we can consider the register

    // requirements for both N and i at the same time. Limiting this code to

    // equality icmps is not a problem because all interesting loops use

    // equality icmps, thanks to IndVarSimplify.

    if (ICmpInst *CI = dyn_cast<ICmpInst>(UserInst)) {

      // If CI can be saved in some target, like replaced inside hardware loop

      // in PowerPC, no need to generate initial formulae for it.

      if (SaveCmp && CI == dyn_cast<ICmpInst>(ExitBranch->getCondition()))

        continue;

      if (CI->isEquality()) {

        // Swap the operands if needed to put the OperandValToReplace on the

        // left, for consistency.

        Value *NV = CI->getOperand(1);

        if (NV == U.getOperandValToReplace()) {

          CI->setOperand(1, CI->getOperand(0));

          CI->setOperand(0, NV);

          NV = CI->getOperand(1);

          Changed = true;

        }


        // x == y  -->  x - y == 0

        const SCEV *N = SE.getSCEV(NV);

        if (SE.isLoopInvariant(N, L) && Rewriter.isSafeToExpand(N) &&

            (!NV->getType()->isPointerTy() ||

             SE.getPointerBase(N) == SE.getPointerBase(S))) {

          // S is normalized, so normalize N before folding it into S

          // to keep the result normalized.

          N = normalizeForPostIncUse(N, TmpPostIncLoops, SE);

          if (!N)

            continue;

          Kind = LSRUse::ICmpZero;

          S = SE.getMinusSCEV(N, S);

        } else if (L->isLoopInvariant(NV) &&

                   (!isa<Instruction>(NV) ||

                    DT.dominates(cast<Instruction>(NV), L->getHeader())) &&

                   !NV->getType()->isPointerTy()) {

          // If we can't generally expand the expression (e.g. it contains

          // a divide), but it is already at a loop invariant point before the

          // loop, wrap it in an unknown (to prevent the expander from trying

          // to re-expand in a potentially unsafe way.)  The restriction to

          // integer types is required because the unknown hides the base, and

          // SCEV can't compute the difference of two unknown pointers.

          N = SE.getUnknown(NV);

          N = normalizeForPostIncUse(N, TmpPostIncLoops, SE);

          if (!N)

            continue;

          Kind = LSRUse::ICmpZero;

          S = SE.getMinusSCEV(N, S);

          assert(!isa<SCEVCouldNotCompute>(S));

        }


        // -1 and the negations of all interesting strides (except the negation

        // of -1) are now also interesting.

        for (size_t i = 0, e = Factors.size(); i != e; ++i)

          if (Factors[i] != -1)

            Factors.insert(-(uint64_t)Factors[i]);

        Factors.insert(-1);

      }

    }


    // Get or create an LSRUse.

    std::pair<size_t, Immediate> P = getUse(S, Kind, AccessTy);

    size_t LUIdx = P.first;

    Immediate Offset = P.second;

    LSRUse &LU = Uses[LUIdx];


    // Record the fixup.

    LSRFixup &LF = LU.getNewFixup();

    LF.UserInst = UserInst;

    LF.OperandValToReplace = U.getOperandValToReplace();

    LF.PostIncLoops = TmpPostIncLoops;

    LF.Offset = Offset;

    LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);


    // Create SCEV as Formula for calculating baseline cost

    if (!VisitedLSRUse.count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {

      Formula F;

      F.initialMatch(S, L, SE);

      BaselineCost.RateFormula(F, Regs, VisitedRegs, LU,

                               HardwareLoopProfitable);

      VisitedLSRUse.insert(LUIdx);

    }


    if (!LU.WidestFixupType ||

        SE.getTypeSizeInBits(LU.WidestFixupType) <

        SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))

      LU.WidestFixupType = LF.OperandValToReplace->getType();


    // If this is the first use of this LSRUse, give it a formula.

    if (LU.Formulae.empty()) {

      InsertInitialFormula(S, LU, LUIdx);

      CountRegisters(LU.Formulae.back(), LUIdx);

    }

  }


  LLVM_DEBUG(print_fixups(dbgs()));

}


/// Insert a formula for the given expression into the given use, separating out

/// loop-variant portions from loop-invariant and loop-computable portions.

void LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU,

                                       size_t LUIdx) {

  // Mark uses whose expressions cannot be expanded.

  if (!Rewriter.isSafeToExpand(S))

    LU.RigidFormula = true;


  Formula F;

  F.initialMatch(S, L, SE);

  bool Inserted = InsertFormula(LU, LUIdx, F);

  assert(Inserted && "Initial formula already exists!"); (void)Inserted;

}


/// Insert a simple single-register formula for the given expression into the

/// given use.

void

LSRInstance::InsertSupplementalFormula(const SCEV *S,

                                       LSRUse &LU, size_t LUIdx) {

  Formula F;

  F.BaseRegs.push_back(S);

  F.HasBaseReg = true;

  bool Inserted = InsertFormula(LU, LUIdx, F);

  assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;

}


/// Note which registers are used by the given formula, updating RegUses.

void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {

  if (F.ScaledReg)

    RegUses.countRegister(F.ScaledReg, LUIdx);

  for (const SCEV *BaseReg : F.BaseRegs)

    RegUses.countRegister(BaseReg, LUIdx);

}


/// If the given formula has not yet been inserted, add it to the list, and

/// return true. Return false otherwise.

bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {

  // Do not insert formula that we will not be able to expand.

  assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) &&

         "Formula is illegal");


  if (!LU.InsertFormula(F, *L))

    return false;


  CountRegisters(F, LUIdx);

  return true;

}


/// Check for other uses of loop-invariant values which we're tracking. These

/// other uses will pin these values in registers, making them less profitable

/// for elimination.

/// TODO: This currently misses non-constant addrec step registers.

/// TODO: Should this give more weight to users inside the loop?

void

LSRInstance::CollectLoopInvariantFixupsAndFormulae() {

  SmallVector<const SCEV *, 8> Worklist(RegUses.begin(), RegUses.end());

  SmallPtrSet<const SCEV *, 32> Visited;


  // Don't collect outside uses if we are favoring postinc - the instructions in

  // the loop are more important than the ones outside of it.

  if (AMK == TTI::AMK_PostIndexed)

    return;


  while (!Worklist.empty()) {

    const SCEV *S = Worklist.pop_back_val();


    // Don't process the same SCEV twice

    if (!Visited.insert(S).second)

      continue;


    if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))

      append_range(Worklist, N->operands());

    else if (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(S))

      Worklist.push_back(C->getOperand());

    else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {

      Worklist.push_back(D->getLHS());

      Worklist.push_back(D->getRHS());

    } else if (const SCEVUnknown *US = dyn_cast<SCEVUnknown>(S)) {

      const Value *V = US->getValue();

      if (const Instruction *Inst = dyn_cast<Instruction>(V)) {

        // Look for instructions defined outside the loop.

        if (L->contains(Inst)) continue;

      } else if (isa<Constant>(V))

        // Constants can be re-materialized.

        continue;

      for (const Use &U : V->uses()) {

        const Instruction *UserInst = dyn_cast<Instruction>(U.getUser());

        // Ignore non-instructions.

        if (!UserInst)

          continue;

        // Don't bother if the instruction is an EHPad.

        if (UserInst->isEHPad())

          continue;

        // Ignore instructions in other functions (as can happen with

        // Constants).

        if (UserInst->getParent()->getParent() != L->getHeader()->getParent())

          continue;

        // Ignore instructions not dominated by the loop.

        const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?

          UserInst->getParent() :

          cast<PHINode>(UserInst)->getIncomingBlock(

            PHINode::getIncomingValueNumForOperand(U.getOperandNo()));

        if (!DT.dominates(L->getHeader(), UseBB))

          continue;

        // Don't bother if the instruction is in a BB which ends in an EHPad.

        if (UseBB->getTerminator()->isEHPad())

          continue;


        // Ignore cases in which the currently-examined value could come from

        // a basic block terminated with an EHPad. This checks all incoming

        // blocks of the phi node since it is possible that the same incoming

        // value comes from multiple basic blocks, only some of which may end

        // in an EHPad. If any of them do, a subsequent rewrite attempt by this

        // pass would try to insert instructions into an EHPad, hitting an

        // assertion.

        if (isa<PHINode>(UserInst)) {

          const auto *PhiNode = cast<PHINode>(UserInst);

          bool HasIncompatibleEHPTerminatedBlock = false;

          llvm::Value *ExpectedValue = U;

          for (unsigned int I = 0; I < PhiNode->getNumIncomingValues(); I++) {

            if (PhiNode->getIncomingValue(I) == ExpectedValue) {

              if (PhiNode->getIncomingBlock(I)->getTerminator()->isEHPad()) {

                HasIncompatibleEHPTerminatedBlock = true;

                break;

              }

            }

          }

          if (HasIncompatibleEHPTerminatedBlock) {

            continue;

          }

        }


        // Don't bother rewriting PHIs in catchswitch blocks.

        if (isa<CatchSwitchInst>(UserInst->getParent()->getTerminator()))

          continue;

        // Ignore uses which are part of other SCEV expressions, to avoid

        // analyzing them multiple times.

        if (SE.isSCEVable(UserInst->getType())) {

          const SCEV *UserS = SE.getSCEV(const_cast<Instruction *>(UserInst));

          // If the user is a no-op, look through to its uses.

          if (!isa<SCEVUnknown>(UserS))

            continue;

          if (UserS == US) {

            Worklist.push_back(

              SE.getUnknown(const_cast<Instruction *>(UserInst)));

            continue;

          }

        }

        // Ignore icmp instructions which are already being analyzed.

        if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {

          unsigned OtherIdx = !U.getOperandNo();

          Value *OtherOp = ICI->getOperand(OtherIdx);

          if (SE.hasComputableLoopEvolution(SE.getSCEV(OtherOp), L))

            continue;

        }


        // Do not consider uses inside lifetime intrinsics. These are not

        // actually materialized.

        if (UserInst->isLifetimeStartOrEnd())

          continue;


        std::pair<size_t, Immediate> P =

            getUse(S, LSRUse::Basic, MemAccessTy());

        size_t LUIdx = P.first;

        Immediate Offset = P.second;

        LSRUse &LU = Uses[LUIdx];

        LSRFixup &LF = LU.getNewFixup();

        LF.UserInst = const_cast<Instruction *>(UserInst);

        LF.OperandValToReplace = U;

        LF.Offset = Offset;

        LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);

        if (!LU.WidestFixupType ||

            SE.getTypeSizeInBits(LU.WidestFixupType) <

            SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))

          LU.WidestFixupType = LF.OperandValToReplace->getType();

        InsertSupplementalFormula(US, LU, LUIdx);

        CountRegisters(LU.Formulae.back(), Uses.size() - 1);

        break;

      }

    }

  }

}


/// Split S into subexpressions which can be pulled out into separate

/// registers. If C is non-null, multiply each subexpression by C.

///

/// Return remainder expression after factoring the subexpressions captured by

/// Ops. If Ops is complete, return NULL.

static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,

                                   SmallVectorImpl<const SCEV *> &Ops,

                                   const Loop *L,

                                   ScalarEvolution &SE,

                                   unsigned Depth = 0) {

  // Arbitrarily cap recursion to protect compile time.

  if (Depth >= 3)

    return S;


  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {

    // Break out add operands.

    for (const SCEV *S : Add->operands()) {

      const SCEV *Remainder = CollectSubexprs(S, C, Ops, L, SE, Depth+1);

      if (Remainder)

        Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);

    }

    return nullptr;

  }

  const SCEV *Start, *Step;

  const SCEVConstant *Op0;

  const SCEV *Op1;

  if (match(S, m_scev_AffineAddRec(m_SCEV(Start), m_SCEV(Step)))) {

    // Split a non-zero base out of an addrec.

    if (Start->isZero())

      return S;


    const SCEV *Remainder = CollectSubexprs(Start, C, Ops, L, SE, Depth + 1);

    // Split the non-zero AddRec unless it is part of a nested recurrence that

    // does not pertain to this loop.

    if (Remainder && (cast<SCEVAddRecExpr>(S)->getLoop() == L ||

                      !isa<SCEVAddRecExpr>(Remainder))) {

      Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);

      Remainder = nullptr;

    }

    if (Remainder != Start) {

      if (!Remainder)

        Remainder = SE.getConstant(S->getType(), 0);

      return SE.getAddRecExpr(Remainder, Step,

                              cast<SCEVAddRecExpr>(S)->getLoop(),

                              // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)

                              SCEV::FlagAnyWrap);

    }

  } else if (match(S, m_scev_Mul(m_SCEVConstant(Op0), m_SCEV(Op1)))) {

    // Break (C * (a + b + c)) into C*a + C*b + C*c.

    C = C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0;

    const SCEV *Remainder = CollectSubexprs(Op1, C, Ops, L, SE, Depth + 1);

    if (Remainder)

      Ops.push_back(SE.getMulExpr(C, Remainder));

    return nullptr;

  }

  return S;

}


/// Return true if the SCEV represents a value that may end up as a

/// post-increment operation.

static bool mayUsePostIncMode(const TargetTransformInfo &TTI,

                              LSRUse &LU, const SCEV *S, const Loop *L,

                              ScalarEvolution &SE) {

  if (LU.Kind != LSRUse::Address ||

      !LU.AccessTy.getType()->isIntOrIntVectorTy())

    return false;

  const SCEV *Start;

  if (!match(S, m_scev_AffineAddRec(m_SCEV(Start), m_SCEVConstant())))

    return false;

  // Check if a post-indexed load/store can be used.

  if (TTI.isIndexedLoadLegal(TTI.MIM_PostInc, S->getType()) ||

      TTI.isIndexedStoreLegal(TTI.MIM_PostInc, S->getType())) {

    if (!isa<SCEVConstant>(Start) && SE.isLoopInvariant(Start, L))

      return true;

  }

  return false;

}


/// Helper function for LSRInstance::GenerateReassociations.

void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,

                                             const Formula &Base,

                                             unsigned Depth, size_t Idx,

                                             bool IsScaledReg) {

  const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];

  // Don't generate reassociations for the base register of a value that

  // may generate a post-increment operator. The reason is that the

  // reassociations cause extra base+register formula to be created,

  // and possibly chosen, but the post-increment is more efficient.

  if (AMK == TTI::AMK_PostIndexed && mayUsePostIncMode(TTI, LU, BaseReg, L, SE))

    return;

  SmallVector<const SCEV *, 8> AddOps;

  const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE);

  if (Remainder)

    AddOps.push_back(Remainder);


  if (AddOps.size() == 1)

    return;


  for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),

                                                     JE = AddOps.end();

       J != JE; ++J) {

    // Loop-variant "unknown" values are uninteresting; we won't be able to

    // do anything meaningful with them.

    if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))

      continue;


    // Don't pull a constant into a register if the constant could be folded

    // into an immediate field.

    if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,

                         LU.AccessTy, *J, Base.getNumRegs() > 1))

      continue;


    // Collect all operands except *J.

    SmallVector<const SCEV *, 8> InnerAddOps(std::as_const(AddOps).begin(), J);

    InnerAddOps.append(std::next(J), std::as_const(AddOps).end());


    // Don't leave just a constant behind in a register if the constant could

    // be folded into an immediate field.

    if (InnerAddOps.size() == 1 &&

        isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,

                         LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))

      continue;


    const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);

    if (InnerSum->isZero())

      continue;

    Formula F = Base;


    if (F.UnfoldedOffset.isNonZero() && F.UnfoldedOffset.isScalable())

      continue;


    // Add the remaining pieces of the add back into the new formula.

    const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);

    if (InnerSumSC && SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&

        TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset.getFixedValue() +

                                InnerSumSC->getValue()->getZExtValue())) {

      F.UnfoldedOffset =

          Immediate::getFixed((uint64_t)F.UnfoldedOffset.getFixedValue() +

                              InnerSumSC->getValue()->getZExtValue());

      if (IsScaledReg) {

        F.ScaledReg = nullptr;

        F.Scale = 0;

      } else

        F.BaseRegs.erase(F.BaseRegs.begin() + Idx);

    } else if (IsScaledReg)

      F.ScaledReg = InnerSum;

    else

      F.BaseRegs[Idx] = InnerSum;


    // Add J as its own register, or an unfolded immediate.

    const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);

    if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&

        TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset.getFixedValue() +

                                SC->getValue()->getZExtValue()))

      F.UnfoldedOffset =

          Immediate::getFixed((uint64_t)F.UnfoldedOffset.getFixedValue() +

                              SC->getValue()->getZExtValue());

    else

      F.BaseRegs.push_back(*J);

    // We may have changed the number of register in base regs, adjust the

    // formula accordingly.

    F.canonicalize(*L);


    if (InsertFormula(LU, LUIdx, F))

      // If that formula hadn't been seen before, recurse to find more like

      // it.

      // Add check on Log16(AddOps.size()) - same as Log2_32(AddOps.size()) >> 2)

      // Because just Depth is not enough to bound compile time.

      // This means that every time AddOps.size() is greater 16^x we will add

      // x to Depth.

      GenerateReassociations(LU, LUIdx, LU.Formulae.back(),

                             Depth + 1 + (Log2_32(AddOps.size()) >> 2));

  }

}


/// Split out subexpressions from adds and the bases of addrecs.

void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,

                                         Formula Base, unsigned Depth) {

  assert(Base.isCanonical(*L) && "Input must be in the canonical form");

  // Arbitrarily cap recursion to protect compile time.

  if (Depth >= 3)

    return;


  for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)

    GenerateReassociationsImpl(LU, LUIdx, Base, Depth, i);


  if (Base.Scale == 1)

    GenerateReassociationsImpl(LU, LUIdx, Base, Depth,

                               /* Idx */ -1, /* IsScaledReg */ true);

}


///  Generate a formula consisting of all of the loop-dominating registers added

/// into a single register.

void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,

                                       Formula Base) {

  // This method is only interesting on a plurality of registers.

  if (Base.BaseRegs.size() + (Base.Scale == 1) +

          (Base.UnfoldedOffset.isNonZero()) <=

      1)

    return;


  // Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before

  // processing the formula.

  Base.unscale();

  SmallVector<const SCEV *, 4> Ops;

  Formula NewBase = Base;

  NewBase.BaseRegs.clear();

  Type *CombinedIntegerType = nullptr;

  for (const SCEV *BaseReg : Base.BaseRegs) {

    if (SE.properlyDominates(BaseReg, L->getHeader()) &&

        !SE.hasComputableLoopEvolution(BaseReg, L)) {

      if (!CombinedIntegerType)

        CombinedIntegerType = SE.getEffectiveSCEVType(BaseReg->getType());

      Ops.push_back(BaseReg);

    }

    else

      NewBase.BaseRegs.push_back(BaseReg);

  }


  // If no register is relevant, we're done.

  if (Ops.size() == 0)

    return;


  // Utility function for generating the required variants of the combined

  // registers.

  auto GenerateFormula = [&](const SCEV *Sum) {

    Formula F = NewBase;


    // TODO: If Sum is zero, it probably means ScalarEvolution missed an

    // opportunity to fold something. For now, just ignore such cases

    // rather than proceed with zero in a register.

    if (Sum->isZero())

      return;


    F.BaseRegs.push_back(Sum);

    F.canonicalize(*L);

    (void)InsertFormula(LU, LUIdx, F);

  };


  // If we collected at least two registers, generate a formula combining them.

  if (Ops.size() > 1) {

    SmallVector<const SCEV *, 4> OpsCopy(Ops); // Don't let SE modify Ops.

    GenerateFormula(SE.getAddExpr(OpsCopy));

  }


  // If we have an unfolded offset, generate a formula combining it with the

  // registers collected.

  if (NewBase.UnfoldedOffset.isNonZero() && NewBase.UnfoldedOffset.isFixed()) {

    assert(CombinedIntegerType && "Missing a type for the unfolded offset");

    Ops.push_back(SE.getConstant(CombinedIntegerType,

                                 NewBase.UnfoldedOffset.getFixedValue(), true));

    NewBase.UnfoldedOffset = Immediate::getFixed(0);

    GenerateFormula(SE.getAddExpr(Ops));

  }

}


/// Helper function for LSRInstance::GenerateSymbolicOffsets.

void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,

                                              const Formula &Base, size_t Idx,

                                              bool IsScaledReg) {

  const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];

  GlobalValue *GV = ExtractSymbol(G, SE);

  if (G->isZero() || !GV)

    return;

  Formula F = Base;

  F.BaseGV = GV;

  if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))

    return;

  if (IsScaledReg)

    F.ScaledReg = G;

  else

    F.BaseRegs[Idx] = G;

  (void)InsertFormula(LU, LUIdx, F);

}


/// Generate reuse formulae using symbolic offsets.

void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,

                                          Formula Base) {

  // We can't add a symbolic offset if the address already contains one.

  if (Base.BaseGV) return;


  for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)

    GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, i);

  if (Base.Scale == 1)

    GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, /* Idx */ -1,

                                /* IsScaledReg */ true);

}


/// Helper function for LSRInstance::GenerateConstantOffsets.

void LSRInstance::GenerateConstantOffsetsImpl(

    LSRUse &LU, unsigned LUIdx, const Formula &Base,

    const SmallVectorImpl<Immediate> &Worklist, size_t Idx, bool IsScaledReg) {


  auto GenerateOffset = [&](const SCEV *G, Immediate Offset) {

    Formula F = Base;

    if (!Base.BaseOffset.isCompatibleImmediate(Offset))

      return;

    F.BaseOffset = Base.BaseOffset.subUnsigned(Offset);


    if (isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) {

      // Add the offset to the base register.

      const SCEV *NewOffset = Offset.getSCEV(SE, G->getType());

      const SCEV *NewG = SE.getAddExpr(NewOffset, G);

      // If it cancelled out, drop the base register, otherwise update it.

      if (NewG->isZero()) {

        if (IsScaledReg) {

          F.Scale = 0;

          F.ScaledReg = nullptr;

        } else

          F.deleteBaseReg(F.BaseRegs[Idx]);

        F.canonicalize(*L);

      } else if (IsScaledReg)

        F.ScaledReg = NewG;

      else

        F.BaseRegs[Idx] = NewG;


      (void)InsertFormula(LU, LUIdx, F);

    }

  };


  const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];


  // With constant offsets and constant steps, we can generate pre-inc

  // accesses by having the offset equal the step. So, for access #0 with a

  // step of 8, we generate a G - 8 base which would require the first access

  // to be ((G - 8) + 8),+,8. The pre-indexed access then updates the pointer

  // for itself and hopefully becomes the base for other accesses. This means

  // means that a single pre-indexed access can be generated to become the new

  // base pointer for each iteration of the loop, resulting in no extra add/sub

  // instructions for pointer updating.

  if (AMK == TTI::AMK_PreIndexed && LU.Kind == LSRUse::Address) {

    const APInt *StepInt;

    if (match(G, m_scev_AffineAddRec(m_SCEV(), m_scev_APInt(StepInt)))) {

      int64_t Step = StepInt->isNegative() ? StepInt->getSExtValue()

                                           : StepInt->getZExtValue();


      for (Immediate Offset : Worklist) {

        if (Offset.isFixed()) {

          Offset = Immediate::getFixed(Offset.getFixedValue() - Step);

          GenerateOffset(G, Offset);

        }

      }

    }

  }

  for (Immediate Offset : Worklist)

    GenerateOffset(G, Offset);


  Immediate Imm = ExtractImmediate(G, SE);

  if (G->isZero() || Imm.isZero() ||

      !Base.BaseOffset.isCompatibleImmediate(Imm))

    return;

  Formula F = Base;

  F.BaseOffset = F.BaseOffset.addUnsigned(Imm);

  if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))

    return;

  if (IsScaledReg) {

    F.ScaledReg = G;

  } else {

    F.BaseRegs[Idx] = G;

    // We may generate non canonical Formula if G is a recurrent expr reg

    // related with current loop while F.ScaledReg is not.

    F.canonicalize(*L);

  }

  (void)InsertFormula(LU, LUIdx, F);

}


/// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets.

void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,

                                          Formula Base) {

  // TODO: For now, just add the min and max offset, because it usually isn't

  // worthwhile looking at everything inbetween.

  SmallVector<Immediate, 2> Worklist;

  Worklist.push_back(LU.MinOffset);

  if (LU.MaxOffset != LU.MinOffset)

    Worklist.push_back(LU.MaxOffset);


  for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)

    GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, i);

  if (Base.Scale == 1)

    GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, /* Idx */ -1,

                                /* IsScaledReg */ true);

}


/// For ICmpZero, check to see if we can scale up the comparison. For example, x

/// == y -> x*c == y*c.

void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,

                                         Formula Base) {

  if (LU.Kind != LSRUse::ICmpZero) return;


  // Determine the integer type for the base formula.

  Type *IntTy = Base.getType();

  if (!IntTy) return;

  if (SE.getTypeSizeInBits(IntTy) > 64) return;


  // Don't do this if there is more than one offset.

  if (LU.MinOffset != LU.MaxOffset) return;


  // Check if transformation is valid. It is illegal to multiply pointer.

  if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy())

    return;

  for (const SCEV *BaseReg : Base.BaseRegs)

    if (BaseReg->getType()->isPointerTy())

      return;

  assert(!Base.BaseGV && "ICmpZero use is not legal!");


  // Check each interesting stride.

  for (int64_t Factor : Factors) {

    // Check that Factor can be represented by IntTy

    if (!ConstantInt::isValueValidForType(IntTy, Factor))

      continue;

    // Check that the multiplication doesn't overflow.

    if (Base.BaseOffset.isMin() && Factor == -1)

      continue;

    // Not supporting scalable immediates.

    if (Base.BaseOffset.isNonZero() && Base.BaseOffset.isScalable())

      continue;

    Immediate NewBaseOffset = Base.BaseOffset.mulUnsigned(Factor);

    assert(Factor != 0 && "Zero factor not expected!");

    if (NewBaseOffset.getFixedValue() / Factor !=

        Base.BaseOffset.getFixedValue())

      continue;

    // If the offset will be truncated at this use, check that it is in bounds.

    if (!IntTy->isPointerTy() &&

        !ConstantInt::isValueValidForType(IntTy, NewBaseOffset.getFixedValue()))

      continue;


    // Check that multiplying with the use offset doesn't overflow.

    Immediate Offset = LU.MinOffset;

    if (Offset.isMin() && Factor == -1)

      continue;

    Offset = Offset.mulUnsigned(Factor);

    if (Offset.getFixedValue() / Factor != LU.MinOffset.getFixedValue())

      continue;

    // If the offset will be truncated at this use, check that it is in bounds.

    if (!IntTy->isPointerTy() &&

        !ConstantInt::isValueValidForType(IntTy, Offset.getFixedValue()))

      continue;


    Formula F = Base;

    F.BaseOffset = NewBaseOffset;


    // Check that this scale is legal.

    if (!isLegalUse(TTI, Offset, Offset, LU.Kind, LU.AccessTy, F))

      continue;


    // Compensate for the use having MinOffset built into it.

    F.BaseOffset = F.BaseOffset.addUnsigned(Offset).subUnsigned(LU.MinOffset);


    const SCEV *FactorS = SE.getConstant(IntTy, Factor);


    // Check that multiplying with each base register doesn't overflow.

    for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {

      F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS);

      if (getExactSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])

        goto next;

    }


    // Check that multiplying with the scaled register doesn't overflow.

    if (F.ScaledReg) {

      F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS);

      if (getExactSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)

        continue;

    }


    // Check that multiplying with the unfolded offset doesn't overflow.

    if (F.UnfoldedOffset.isNonZero()) {

      if (F.UnfoldedOffset.isMin() && Factor == -1)

        continue;

      F.UnfoldedOffset = F.UnfoldedOffset.mulUnsigned(Factor);

      if (F.UnfoldedOffset.getFixedValue() / Factor !=

          Base.UnfoldedOffset.getFixedValue())

        continue;

      // If the offset will be truncated, check that it is in bounds.

      if (!IntTy->isPointerTy() && !ConstantInt::isValueValidForType(

                                       IntTy, F.UnfoldedOffset.getFixedValue()))

        continue;

    }


    // If we make it here and it's legal, add it.

    (void)InsertFormula(LU, LUIdx, F);

  next:;

  }

}


/// Generate stride factor reuse formulae by making use of scaled-offset address

/// modes, for example.

void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {

  // Determine the integer type for the base formula.

  Type *IntTy = Base.getType();

  if (!IntTy) return;


  // If this Formula already has a scaled register, we can't add another one.

  // Try to unscale the formula to generate a better scale.

  if (Base.Scale != 0 && !Base.unscale())

    return;


  assert(Base.Scale == 0 && "unscale did not did its job!");


  // Check each interesting stride.

  for (int64_t Factor : Factors) {

    Base.Scale = Factor;

    Base.HasBaseReg = Base.BaseRegs.size() > 1;

    // Check whether this scale is going to be legal.

    if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,

                    Base)) {

      // As a special-case, handle special out-of-loop Basic users specially.

      // TODO: Reconsider this special case.

      if (LU.Kind == LSRUse::Basic &&

          isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,

                     LU.AccessTy, Base) &&

          LU.AllFixupsOutsideLoop)

        LU.Kind = LSRUse::Special;

      else

        continue;

    }

    // For an ICmpZero, negating a solitary base register won't lead to

    // new solutions.

    if (LU.Kind == LSRUse::ICmpZero && !Base.HasBaseReg &&

        Base.BaseOffset.isZero() && !Base.BaseGV)

      continue;

    // For each addrec base reg, if its loop is current loop, apply the scale.

    for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {

      const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i]);

      if (AR && (AR->getLoop() == L || LU.AllFixupsOutsideLoop)) {

        const SCEV *FactorS = SE.getConstant(IntTy, Factor);

        if (FactorS->isZero())

          continue;

        // Divide out the factor, ignoring high bits, since we'll be

        // scaling the value back up in the end.

        if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true))

          if (!Quotient->isZero()) {

            // TODO: This could be optimized to avoid all the copying.

            Formula F = Base;

            F.ScaledReg = Quotient;

            F.deleteBaseReg(F.BaseRegs[i]);

            // The canonical representation of 1*reg is reg, which is already in

            // Base. In that case, do not try to insert the formula, it will be

            // rejected anyway.

            if (F.Scale == 1 && (F.BaseRegs.empty() ||

                                 (AR->getLoop() != L && LU.AllFixupsOutsideLoop)))

              continue;

            // If AllFixupsOutsideLoop is true and F.Scale is 1, we may generate

            // non canonical Formula with ScaledReg's loop not being L.

            if (F.Scale == 1 && LU.AllFixupsOutsideLoop)

              F.canonicalize(*L);

            (void)InsertFormula(LU, LUIdx, F);

          }

      }

    }

  }

}


/// Extend/Truncate \p Expr to \p ToTy considering post-inc uses in \p Loops.

/// For all PostIncLoopSets in \p Loops, first de-normalize \p Expr, then

/// perform the extension/truncate and normalize again, as the normalized form

/// can result in folds that are not valid in the post-inc use contexts. The

/// expressions for all PostIncLoopSets must match, otherwise return nullptr.

static const SCEV *

getAnyExtendConsideringPostIncUses(ArrayRef<PostIncLoopSet> Loops,

                                   const SCEV *Expr, Type *ToTy,

                                   ScalarEvolution &SE) {

  const SCEV *Result = nullptr;

  for (auto &L : Loops) {

    auto *DenormExpr = denormalizeForPostIncUse(Expr, L, SE);

    const SCEV *NewDenormExpr = SE.getAnyExtendExpr(DenormExpr, ToTy);

    const SCEV *New = normalizeForPostIncUse(NewDenormExpr, L, SE);

    if (!New || (Result && New != Result))

      return nullptr;

    Result = New;

  }


  assert(Result && "failed to create expression");

  return Result;

}


/// Generate reuse formulae from different IV types.

void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {

  // Don't bother truncating symbolic values.

  if (Base.BaseGV) return;


  // Determine the integer type for the base formula.

  Type *DstTy = Base.getType();

  if (!DstTy) return;

  if (DstTy->isPointerTy())

    return;


  // It is invalid to extend a pointer type so exit early if ScaledReg or

  // any of the BaseRegs are pointers.

  if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy())

    return;

  if (any_of(Base.BaseRegs,

             [](const SCEV *S) { return S->getType()->isPointerTy(); }))

    return;


  SmallVector<PostIncLoopSet> Loops;

  for (auto &LF : LU.Fixups)

    Loops.push_back(LF.PostIncLoops);


  for (Type *SrcTy : Types) {

    if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) {

      Formula F = Base;


      // Sometimes SCEV is able to prove zero during ext transform. It may

      // happen if SCEV did not do all possible transforms while creating the

      // initial node (maybe due to depth limitations), but it can do them while

      // taking ext.

      if (F.ScaledReg) {

        const SCEV *NewScaledReg =

            getAnyExtendConsideringPostIncUses(Loops, F.ScaledReg, SrcTy, SE);

        if (!NewScaledReg || NewScaledReg->isZero())

          continue;

        F.ScaledReg = NewScaledReg;

      }

      bool HasZeroBaseReg = false;

      for (const SCEV *&BaseReg : F.BaseRegs) {

        const SCEV *NewBaseReg =

            getAnyExtendConsideringPostIncUses(Loops, BaseReg, SrcTy, SE);

        if (!NewBaseReg || NewBaseReg->isZero()) {

          HasZeroBaseReg = true;

          break;

        }

        BaseReg = NewBaseReg;

      }

      if (HasZeroBaseReg)

        continue;


      // TODO: This assumes we've done basic processing on all uses and

      // have an idea what the register usage is.

      if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))

        continue;


      F.canonicalize(*L);

      (void)InsertFormula(LU, LUIdx, F);

    }

  }

}


namespace {


/// Helper class for GenerateCrossUseConstantOffsets. It's used to defer

/// modifications so that the search phase doesn't have to worry about the data

/// structures moving underneath it.

struct WorkItem {

  size_t LUIdx;

  Immediate Imm;

  const SCEV *OrigReg;


  WorkItem(size_t LI, Immediate I, const SCEV *R)

      : LUIdx(LI), Imm(I), OrigReg(R) {}


  void print(raw_ostream &OS) const;

  void dump() const;

};


} // end anonymous namespace


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void WorkItem::print(raw_ostream &OS) const {

  OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx

     << " , add offset " << Imm;

}


LLVM_DUMP_METHOD void WorkItem::dump() const {

  print(errs()); errs() << '\n';

}

#endif


/// Look for registers which are a constant distance apart and try to form reuse

/// opportunities between them.

void LSRInstance::GenerateCrossUseConstantOffsets() {

  // Group the registers by their value without any added constant offset.

  using ImmMapTy = std::map<Immediate, const SCEV *, KeyOrderTargetImmediate>;


  DenseMap<const SCEV *, ImmMapTy> Map;

  DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;

  SmallVector<const SCEV *, 8> Sequence;

  for (const SCEV *Use : RegUses) {

    const SCEV *Reg = Use; // Make a copy for ExtractImmediate to modify.

    Immediate Imm = ExtractImmediate(Reg, SE);

    auto Pair = Map.try_emplace(Reg);

    if (Pair.second)

      Sequence.push_back(Reg);

    Pair.first->second.insert(std::make_pair(Imm, Use));

    UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(Use);

  }


  // Now examine each set of registers with the same base value. Build up

  // a list of work to do and do the work in a separate step so that we're

  // not adding formulae and register counts while we're searching.

  SmallVector<WorkItem, 32> WorkItems;

  SmallSet<std::pair<size_t, Immediate>, 32, KeyOrderSizeTAndImmediate>

      UniqueItems;

  for (const SCEV *Reg : Sequence) {

    const ImmMapTy &Imms = Map.find(Reg)->second;


    // It's not worthwhile looking for reuse if there's only one offset.

    if (Imms.size() == 1)

      continue;


    LLVM_DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':';

               for (const auto &Entry

                    : Imms) dbgs()

               << ' ' << Entry.first;

               dbgs() << '\n');


    // Examine each offset.

    for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();

         J != JE; ++J) {

      const SCEV *OrigReg = J->second;


      Immediate JImm = J->first;

      const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);


      if (!isa<SCEVConstant>(OrigReg) &&

          UsedByIndicesMap[Reg].count() == 1) {

        LLVM_DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg

                          << '\n');

        continue;

      }


      // Conservatively examine offsets between this orig reg a few selected

      // other orig regs.

      Immediate First = Imms.begin()->first;

      Immediate Last = std::prev(Imms.end())->first;

      if (!First.isCompatibleImmediate(Last)) {

        LLVM_DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg

                          << "\n");

        continue;

      }

      // Only scalable if both terms are scalable, or if one is scalable and

      // the other is 0.

      bool Scalable = First.isScalable() || Last.isScalable();

      int64_t FI = First.getKnownMinValue();

      int64_t LI = Last.getKnownMinValue();

      // Compute (First + Last)  / 2 without overflow using the fact that

      // First + Last = 2 * (First + Last) + (First ^ Last).

      int64_t Avg = (FI & LI) + ((FI ^ LI) >> 1);

      // If the result is negative and FI is odd and LI even (or vice versa),

      // we rounded towards -inf. Add 1 in that case, to round towards 0.

      Avg = Avg + ((FI ^ LI) & ((uint64_t)Avg >> 63));

      ImmMapTy::const_iterator OtherImms[] = {

          Imms.begin(), std::prev(Imms.end()),

          Imms.lower_bound(Immediate::get(Avg, Scalable))};

      for (const auto &M : OtherImms) {

        if (M == J || M == JE) continue;

        if (!JImm.isCompatibleImmediate(M->first))

          continue;


        // Compute the difference between the two.

        Immediate Imm = JImm.subUnsigned(M->first);

        for (unsigned LUIdx : UsedByIndices.set_bits())

          // Make a memo of this use, offset, and register tuple.

          if (UniqueItems.insert(std::make_pair(LUIdx, Imm)).second)

            WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg));

      }

    }

  }


  Map.clear();

  Sequence.clear();

  UsedByIndicesMap.clear();

  UniqueItems.clear();


  // Now iterate through the worklist and add new formulae.

  for (const WorkItem &WI : WorkItems) {

    size_t LUIdx = WI.LUIdx;

    LSRUse &LU = Uses[LUIdx];

    Immediate Imm = WI.Imm;

    const SCEV *OrigReg = WI.OrigReg;


    Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());

    const SCEV *NegImmS = Imm.getNegativeSCEV(SE, IntTy);

    unsigned BitWidth = SE.getTypeSizeInBits(IntTy);


    // TODO: Use a more targeted data structure.

    for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {

      Formula F = LU.Formulae[L];

      // FIXME: The code for the scaled and unscaled registers looks

      // very similar but slightly different. Investigate if they

      // could be merged. That way, we would not have to unscale the

      // Formula.

      F.unscale();

      // Use the immediate in the scaled register.

      if (F.ScaledReg == OrigReg) {

        if (!F.BaseOffset.isCompatibleImmediate(Imm))

          continue;

        Immediate Offset = F.BaseOffset.addUnsigned(Imm.mulUnsigned(F.Scale));

        // Don't create 50 + reg(-50).

        const SCEV *S = Offset.getNegativeSCEV(SE, IntTy);

        if (F.referencesReg(S))

          continue;

        Formula NewF = F;

        NewF.BaseOffset = Offset;

        if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,

                        NewF))

          continue;

        NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);


        // If the new scale is a constant in a register, and adding the constant

        // value to the immediate would produce a value closer to zero than the

        // immediate itself, then the formula isn't worthwhile.

        if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg)) {

          // FIXME: Do we need to do something for scalable immediates here?

          //        A scalable SCEV won't be constant, but we might still have

          //        something in the offset? Bail out for now to be safe.

          if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())

            continue;

          if (C->getValue()->isNegative() !=

                  (NewF.BaseOffset.isLessThanZero()) &&

              (C->getAPInt().abs() * APInt(BitWidth, F.Scale))

                  .ule(std::abs(NewF.BaseOffset.getFixedValue())))

            continue;

        }


        // OK, looks good.

        NewF.canonicalize(*this->L);

        (void)InsertFormula(LU, LUIdx, NewF);

      } else {

        // Use the immediate in a base register.

        for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) {

          const SCEV *BaseReg = F.BaseRegs[N];

          if (BaseReg != OrigReg)

            continue;

          Formula NewF = F;

          if (!NewF.BaseOffset.isCompatibleImmediate(Imm) ||

              !NewF.UnfoldedOffset.isCompatibleImmediate(Imm) ||

              !NewF.BaseOffset.isCompatibleImmediate(NewF.UnfoldedOffset))

            continue;

          NewF.BaseOffset = NewF.BaseOffset.addUnsigned(Imm);

          if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset,

                          LU.Kind, LU.AccessTy, NewF)) {

            if (AMK == TTI::AMK_PostIndexed &&

                mayUsePostIncMode(TTI, LU, OrigReg, this->L, SE))

              continue;

            Immediate NewUnfoldedOffset = NewF.UnfoldedOffset.addUnsigned(Imm);

            if (!isLegalAddImmediate(TTI, NewUnfoldedOffset))

              continue;

            NewF = F;

            NewF.UnfoldedOffset = NewUnfoldedOffset;

          }

          NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg);


          // If the new formula has a constant in a register, and adding the

          // constant value to the immediate would produce a value closer to

          // zero than the immediate itself, then the formula isn't worthwhile.

          for (const SCEV *NewReg : NewF.BaseRegs)

            if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewReg)) {

              if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())

                goto skip_formula;

              if ((C->getAPInt() + NewF.BaseOffset.getFixedValue())

                      .abs()

                      .slt(std::abs(NewF.BaseOffset.getFixedValue())) &&

                  (C->getAPInt() + NewF.BaseOffset.getFixedValue())

                          .countr_zero() >=

                      (unsigned)llvm::countr_zero<uint64_t>(

                          NewF.BaseOffset.getFixedValue()))

                goto skip_formula;

            }


          // Ok, looks good.

          NewF.canonicalize(*this->L);

          (void)InsertFormula(LU, LUIdx, NewF);

          break;

        skip_formula:;

        }

      }

    }

  }

}


/// Generate formulae for each use.

void

LSRInstance::GenerateAllReuseFormulae() {

  // This is split into multiple loops so that hasRegsUsedByUsesOtherThan

  // queries are more precise.

  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {

    LSRUse &LU = Uses[LUIdx];

    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)

      GenerateReassociations(LU, LUIdx, LU.Formulae[i]);

    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)

      GenerateCombinations(LU, LUIdx, LU.Formulae[i]);

  }

  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {

    LSRUse &LU = Uses[LUIdx];

    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)

      GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);

    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)

      GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);

    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)

      GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);

    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)

      GenerateScales(LU, LUIdx, LU.Formulae[i]);

  }

  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {

    LSRUse &LU = Uses[LUIdx];

    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)

      GenerateTruncates(LU, LUIdx, LU.Formulae[i]);

  }


  GenerateCrossUseConstantOffsets();


  LLVM_DEBUG(dbgs() << "\n"

                       "After generating reuse formulae:\n";

             print_uses(dbgs()));

}


/// If there are multiple formulae with the same set of registers used

/// by other uses, pick the best one and delete the others.

void LSRInstance::FilterOutUndesirableDedicatedRegisters() {

  DenseSet<const SCEV *> VisitedRegs;

  SmallPtrSet<const SCEV *, 16> Regs;

  SmallPtrSet<const SCEV *, 16> LoserRegs;

#ifndef NDEBUG

  bool ChangedFormulae = false;

#endif


  // Collect the best formula for each unique set of shared registers. This

  // is reset for each use.

  using BestFormulaeTy = DenseMap<SmallVector<const SCEV *, 4>, size_t>;


  BestFormulaeTy BestFormulae;


  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {

    LSRUse &LU = Uses[LUIdx];

    LLVM_DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs());

               dbgs() << '\n');


    bool Any = false;

    for (size_t FIdx = 0, NumForms = LU.Formulae.size();

         FIdx != NumForms; ++FIdx) {

      Formula &F = LU.Formulae[FIdx];


      // Some formulas are instant losers. For example, they may depend on

      // nonexistent AddRecs from other loops. These need to be filtered

      // immediately, otherwise heuristics could choose them over others leading

      // to an unsatisfactory solution. Passing LoserRegs into RateFormula here

      // avoids the need to recompute this information across formulae using the

      // same bad AddRec. Passing LoserRegs is also essential unless we remove

      // the corresponding bad register from the Regs set.

      Cost CostF(L, SE, TTI, AMK);

      Regs.clear();

      CostF.RateFormula(F, Regs, VisitedRegs, LU, HardwareLoopProfitable,

                        &LoserRegs);

      if (CostF.isLoser()) {

        // During initial formula generation, undesirable formulae are generated

        // by uses within other loops that have some non-trivial address mode or

        // use the postinc form of the IV. LSR needs to provide these formulae

        // as the basis of rediscovering the desired formula that uses an AddRec

        // corresponding to the existing phi. Once all formulae have been

        // generated, these initial losers may be pruned.

        LLVM_DEBUG(dbgs() << "  Filtering loser "; F.print(dbgs());

                   dbgs() << "\n");

      }

      else {

        SmallVector<const SCEV *, 4> Key;

        for (const SCEV *Reg : F.BaseRegs) {

          if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))

            Key.push_back(Reg);

        }

        if (F.ScaledReg &&

            RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))

          Key.push_back(F.ScaledReg);

        // Unstable sort by host order ok, because this is only used for

        // uniquifying.

        llvm::sort(Key);


        std::pair<BestFormulaeTy::const_iterator, bool> P =

          BestFormulae.insert(std::make_pair(Key, FIdx));

        if (P.second)

          continue;


        Formula &Best = LU.Formulae[P.first->second];


        Cost CostBest(L, SE, TTI, AMK);

        Regs.clear();

        CostBest.RateFormula(Best, Regs, VisitedRegs, LU,

                             HardwareLoopProfitable);

        if (CostF.isLess(CostBest))

          std::swap(F, Best);

        LLVM_DEBUG(dbgs() << "  Filtering out formula "; F.print(dbgs());

                   dbgs() << "\n"

                             "    in favor of formula ";

                   Best.print(dbgs()); dbgs() << '\n');

      }

#ifndef NDEBUG

      ChangedFormulae = true;

#endif

      LU.DeleteFormula(F);

      --FIdx;

      --NumForms;

      Any = true;

    }


    // Now that we've filtered out some formulae, recompute the Regs set.

    if (Any)

      LU.RecomputeRegs(LUIdx, RegUses);


    // Reset this to prepare for the next use.

    BestFormulae.clear();

  }


  LLVM_DEBUG(if (ChangedFormulae) {

    dbgs() << "\n"

              "After filtering out undesirable candidates:\n";

    print_uses(dbgs());

  });

}


/// Estimate the worst-case number of solutions the solver might have to

/// consider. It almost never considers this many solutions because it prune the

/// search space, but the pruning isn't always sufficient.

size_t LSRInstance::EstimateSearchSpaceComplexity() const {

  size_t Power = 1;

  for (const LSRUse &LU : Uses) {

    size_t FSize = LU.Formulae.size();

    if (FSize >= ComplexityLimit) {

      Power = ComplexityLimit;

      break;

    }

    Power *= FSize;

    if (Power >= ComplexityLimit)

      break;

  }

  return Power;

}


/// When one formula uses a superset of the registers of another formula, it

/// won't help reduce register pressure (though it may not necessarily hurt

/// register pressure); remove it to simplify the system.

void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {

  if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {

    LLVM_DEBUG(dbgs() << "The search space is too complex.\n");


    LLVM_DEBUG(dbgs() << "Narrowing the search space by eliminating formulae "

                         "which use a superset of registers used by other "

                         "formulae.\n");


    for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {

      LSRUse &LU = Uses[LUIdx];

      bool Any = false;

      for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {

        Formula &F = LU.Formulae[i];

        if (F.BaseOffset.isNonZero() && F.BaseOffset.isScalable())

          continue;

        // Look for a formula with a constant or GV in a register. If the use

        // also has a formula with that same value in an immediate field,

        // delete the one that uses a register.

        for (SmallVectorImpl<const SCEV *>::const_iterator

             I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {

          if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {

            Formula NewF = F;

            //FIXME: Formulas should store bitwidth to do wrapping properly.

            //       See PR41034.

            NewF.BaseOffset =

                Immediate::getFixed(NewF.BaseOffset.getFixedValue() +

                                    (uint64_t)C->getValue()->getSExtValue());

            NewF.BaseRegs.erase(NewF.BaseRegs.begin() +

                                (I - F.BaseRegs.begin()));

            if (LU.HasFormulaWithSameRegs(NewF)) {

              LLVM_DEBUG(dbgs() << "  Deleting "; F.print(dbgs());

                         dbgs() << '\n');

              LU.DeleteFormula(F);

              --i;

              --e;

              Any = true;

              break;

            }

          } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) {

            if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))

              if (!F.BaseGV) {

                Formula NewF = F;

                NewF.BaseGV = GV;

                NewF.BaseRegs.erase(NewF.BaseRegs.begin() +

                                    (I - F.BaseRegs.begin()));

                if (LU.HasFormulaWithSameRegs(NewF)) {

                  LLVM_DEBUG(dbgs() << "  Deleting "; F.print(dbgs());

                             dbgs() << '\n');

                  LU.DeleteFormula(F);

                  --i;

                  --e;

                  Any = true;

                  break;

                }

              }

          }

        }

      }

      if (Any)

        LU.RecomputeRegs(LUIdx, RegUses);

    }


    LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));

  }

}


/// When there are many registers for expressions like A, A+1, A+2, etc.,

/// allocate a single register for them.

void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {

  if (EstimateSearchSpaceComplexity() < ComplexityLimit)

    return;


  LLVM_DEBUG(

      dbgs() << "The search space is too complex.\n"

                "Narrowing the search space by assuming that uses separated "

                "by a constant offset will use the same registers.\n");


  // This is especially useful for unrolled loops.


  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {

    LSRUse &LU = Uses[LUIdx];

    for (const Formula &F : LU.Formulae) {

      if (F.BaseOffset.isZero() || (F.Scale != 0 && F.Scale != 1))

        continue;


      LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU);

      if (!LUThatHas)

        continue;


      if (!reconcileNewOffset(*LUThatHas, F.BaseOffset, /*HasBaseReg=*/ false,

                              LU.Kind, LU.AccessTy))

        continue;


      LLVM_DEBUG(dbgs() << "  Deleting use "; LU.print(dbgs()); dbgs() << '\n');


      LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;


      // Transfer the fixups of LU to LUThatHas.

      for (LSRFixup &Fixup : LU.Fixups) {

        Fixup.Offset += F.BaseOffset;

        LUThatHas->pushFixup(Fixup);

        LLVM_DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n');

      }


      // Delete formulae from the new use which are no longer legal.

      bool Any = false;

      for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {

        Formula &F = LUThatHas->Formulae[i];

        if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,

                        LUThatHas->Kind, LUThatHas->AccessTy, F)) {

          LLVM_DEBUG(dbgs() << "  Deleting "; F.print(dbgs()); dbgs() << '\n');

          LUThatHas->DeleteFormula(F);

          --i;

          --e;

          Any = true;

        }

      }


      if (Any)

        LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);


      // Delete the old use.

      DeleteUse(LU, LUIdx);

      --LUIdx;

      --NumUses;

      break;

    }

  }


  LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));

}


/// Call FilterOutUndesirableDedicatedRegisters again, if necessary, now that

/// we've done more filtering, as it may be able to find more formulae to

/// eliminate.

void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){

  if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {

    LLVM_DEBUG(dbgs() << "The search space is too complex.\n");


    LLVM_DEBUG(dbgs() << "Narrowing the search space by re-filtering out "

                         "undesirable dedicated registers.\n");


    FilterOutUndesirableDedicatedRegisters();


    LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));

  }

}


/// If a LSRUse has multiple formulae with the same ScaledReg and Scale.

/// Pick the best one and delete the others.

/// This narrowing heuristic is to keep as many formulae with different

/// Scale and ScaledReg pair as possible while narrowing the search space.

/// The benefit is that it is more likely to find out a better solution

/// from a formulae set with more Scale and ScaledReg variations than

/// a formulae set with the same Scale and ScaledReg. The picking winner

/// reg heuristic will often keep the formulae with the same Scale and

/// ScaledReg and filter others, and we want to avoid that if possible.

void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {

  if (EstimateSearchSpaceComplexity() < ComplexityLimit)

    return;


  LLVM_DEBUG(

      dbgs() << "The search space is too complex.\n"

                "Narrowing the search space by choosing the best Formula "

                "from the Formulae with the same Scale and ScaledReg.\n");


  // Map the "Scale * ScaledReg" pair to the best formula of current LSRUse.

  using BestFormulaeTy = DenseMap<std::pair<const SCEV *, int64_t>, size_t>;


  BestFormulaeTy BestFormulae;

#ifndef NDEBUG

  bool ChangedFormulae = false;

#endif

  DenseSet<const SCEV *> VisitedRegs;

  SmallPtrSet<const SCEV *, 16> Regs;


  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {

    LSRUse &LU = Uses[LUIdx];

    LLVM_DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs());

               dbgs() << '\n');


    // Return true if Formula FA is better than Formula FB.

    auto IsBetterThan = [&](Formula &FA, Formula &FB) {

      // First we will try to choose the Formula with fewer new registers.

      // For a register used by current Formula, the more the register is

      // shared among LSRUses, the less we increase the register number

      // counter of the formula.

      size_t FARegNum = 0;

      for (const SCEV *Reg : FA.BaseRegs) {

        const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);

        FARegNum += (NumUses - UsedByIndices.count() + 1);

      }

      size_t FBRegNum = 0;

      for (const SCEV *Reg : FB.BaseRegs) {

        const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);

        FBRegNum += (NumUses - UsedByIndices.count() + 1);

      }

      if (FARegNum != FBRegNum)

        return FARegNum < FBRegNum;


      // If the new register numbers are the same, choose the Formula with

      // less Cost.

      Cost CostFA(L, SE, TTI, AMK);

      Cost CostFB(L, SE, TTI, AMK);

      Regs.clear();

      CostFA.RateFormula(FA, Regs, VisitedRegs, LU, HardwareLoopProfitable);

      Regs.clear();

      CostFB.RateFormula(FB, Regs, VisitedRegs, LU, HardwareLoopProfitable);

      return CostFA.isLess(CostFB);

    };


    bool Any = false;

    for (size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;

         ++FIdx) {

      Formula &F = LU.Formulae[FIdx];

      if (!F.ScaledReg)

        continue;

      auto P = BestFormulae.insert({{F.ScaledReg, F.Scale}, FIdx});

      if (P.second)

        continue;


      Formula &Best = LU.Formulae[P.first->second];

      if (IsBetterThan(F, Best))

        std::swap(F, Best);

      LLVM_DEBUG(dbgs() << "  Filtering out formula "; F.print(dbgs());

                 dbgs() << "\n"

                           "    in favor of formula ";

                 Best.print(dbgs()); dbgs() << '\n');

#ifndef NDEBUG

      ChangedFormulae = true;

#endif

      LU.DeleteFormula(F);

      --FIdx;

      --NumForms;

      Any = true;

    }

    if (Any)

      LU.RecomputeRegs(LUIdx, RegUses);


    // Reset this to prepare for the next use.

    BestFormulae.clear();

  }


  LLVM_DEBUG(if (ChangedFormulae) {

    dbgs() << "\n"

              "After filtering out undesirable candidates:\n";

    print_uses(dbgs());

  });

}


/// If we are over the complexity limit, filter out any post-inc prefering

/// variables to only post-inc values.

void LSRInstance::NarrowSearchSpaceByFilterPostInc() {

  if (AMK != TTI::AMK_PostIndexed)

    return;

  if (EstimateSearchSpaceComplexity() < ComplexityLimit)

    return;


  LLVM_DEBUG(dbgs() << "The search space is too complex.\n"

                       "Narrowing the search space by choosing the lowest "

                       "register Formula for PostInc Uses.\n");


  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {

    LSRUse &LU = Uses[LUIdx];


    if (LU.Kind != LSRUse::Address)

      continue;

    if (!TTI.isIndexedLoadLegal(TTI.MIM_PostInc, LU.AccessTy.getType()) &&

        !TTI.isIndexedStoreLegal(TTI.MIM_PostInc, LU.AccessTy.getType()))

      continue;


    size_t MinRegs = std::numeric_limits<size_t>::max();

    for (const Formula &F : LU.Formulae)

      MinRegs = std::min(F.getNumRegs(), MinRegs);


    bool Any = false;

    for (size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;

         ++FIdx) {

      Formula &F = LU.Formulae[FIdx];

      if (F.getNumRegs() > MinRegs) {

        LLVM_DEBUG(dbgs() << "  Filtering out formula "; F.print(dbgs());

                   dbgs() << "\n");

        LU.DeleteFormula(F);

        --FIdx;

        --NumForms;

        Any = true;

      }

    }

    if (Any)

      LU.RecomputeRegs(LUIdx, RegUses);


    if (EstimateSearchSpaceComplexity() < ComplexityLimit)

      break;

  }


  LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));

}


/// The function delete formulas with high registers number expectation.

/// Assuming we don't know the value of each formula (already delete

/// all inefficient), generate probability of not selecting for each

/// register.

/// For example,

/// Use1:

///  reg(a) + reg({0,+,1})

///  reg(a) + reg({-1,+,1}) + 1

///  reg({a,+,1})

/// Use2:

///  reg(b) + reg({0,+,1})

///  reg(b) + reg({-1,+,1}) + 1

///  reg({b,+,1})

/// Use3:

///  reg(c) + reg(b) + reg({0,+,1})

///  reg(c) + reg({b,+,1})

///

/// Probability of not selecting

///                 Use1   Use2    Use3

/// reg(a)         (1/3) *   1   *   1

/// reg(b)           1   * (1/3) * (1/2)

/// reg({0,+,1})   (2/3) * (2/3) * (1/2)

/// reg({-1,+,1})  (2/3) * (2/3) *   1

/// reg({a,+,1})   (2/3) *   1   *   1

/// reg({b,+,1})     1   * (2/3) * (2/3)

/// reg(c)           1   *   1   *   0

///

/// Now count registers number mathematical expectation for each formula:

/// Note that for each use we exclude probability if not selecting for the use.

/// For example for Use1 probability for reg(a) would be just 1 * 1 (excluding

/// probabilty 1/3 of not selecting for Use1).

/// Use1:

///  reg(a) + reg({0,+,1})          1 + 1/3       -- to be deleted

///  reg(a) + reg({-1,+,1}) + 1     1 + 4/9       -- to be deleted

///  reg({a,+,1})                   1

/// Use2:

///  reg(b) + reg({0,+,1})          1/2 + 1/3     -- to be deleted

///  reg(b) + reg({-1,+,1}) + 1     1/2 + 2/3     -- to be deleted

///  reg({b,+,1})                   2/3

/// Use3:

///  reg(c) + reg(b) + reg({0,+,1}) 1 + 1/3 + 4/9 -- to be deleted

///  reg(c) + reg({b,+,1})          1 + 2/3

void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {

  if (EstimateSearchSpaceComplexity() < ComplexityLimit)

    return;

  // Ok, we have too many of formulae on our hands to conveniently handle.

  // Use a rough heuristic to thin out the list.


  // Set of Regs wich will be 100% used in final solution.

  // Used in each formula of a solution (in example above this is reg(c)).

  // We can skip them in calculations.

  SmallPtrSet<const SCEV *, 4> UniqRegs;

  LLVM_DEBUG(dbgs() << "The search space is too complex.\n");


  // Map each register to probability of not selecting

  DenseMap <const SCEV *, float> RegNumMap;

  for (const SCEV *Reg : RegUses) {

    if (UniqRegs.count(Reg))

      continue;

    float PNotSel = 1;

    for (const LSRUse &LU : Uses) {

      if (!LU.Regs.count(Reg))

        continue;

      float P = LU.getNotSelectedProbability(Reg);

      if (P != 0.0)

        PNotSel *= P;

      else

        UniqRegs.insert(Reg);

    }

    RegNumMap.insert(std::make_pair(Reg, PNotSel));

  }


  LLVM_DEBUG(

      dbgs() << "Narrowing the search space by deleting costly formulas\n");


  // Delete formulas where registers number expectation is high.

  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {

    LSRUse &LU = Uses[LUIdx];

    // If nothing to delete - continue.

    if (LU.Formulae.size() < 2)

      continue;

    // This is temporary solution to test performance. Float should be

    // replaced with round independent type (based on integers) to avoid

    // different results for different target builds.

    float FMinRegNum = LU.Formulae[0].getNumRegs();

    float FMinARegNum = LU.Formulae[0].getNumRegs();

    size_t MinIdx = 0;

    for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {

      Formula &F = LU.Formulae[i];

      float FRegNum = 0;

      float FARegNum = 0;

      for (const SCEV *BaseReg : F.BaseRegs) {

        if (UniqRegs.count(BaseReg))

          continue;

        FRegNum += RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);

        if (isa<SCEVAddRecExpr>(BaseReg))

          FARegNum +=

              RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);

      }

      if (const SCEV *ScaledReg = F.ScaledReg) {

        if (!UniqRegs.count(ScaledReg)) {

          FRegNum +=

              RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);

          if (isa<SCEVAddRecExpr>(ScaledReg))

            FARegNum +=

                RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);

        }

      }

      if (FMinRegNum > FRegNum ||

          (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {

        FMinRegNum = FRegNum;

        FMinARegNum = FARegNum;

        MinIdx = i;

      }

    }

    LLVM_DEBUG(dbgs() << "  The formula "; LU.Formulae[MinIdx].print(dbgs());

               dbgs() << " with min reg num " << FMinRegNum << '\n');

    if (MinIdx != 0)

      std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);

    while (LU.Formulae.size() != 1) {

      LLVM_DEBUG(dbgs() << "  Deleting "; LU.Formulae.back().print(dbgs());

                 dbgs() << '\n');

      LU.Formulae.pop_back();

    }

    LU.RecomputeRegs(LUIdx, RegUses);

    assert(LU.Formulae.size() == 1 && "Should be exactly 1 min regs formula");

    Formula &F = LU.Formulae[0];

    LLVM_DEBUG(dbgs() << "  Leaving only "; F.print(dbgs()); dbgs() << '\n');

    // When we choose the formula, the regs become unique.

    UniqRegs.insert_range(F.BaseRegs);

    if (F.ScaledReg)

      UniqRegs.insert(F.ScaledReg);

  }

  LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));

}


// Check if Best and Reg are SCEVs separated by a constant amount C, and if so

// would the addressing offset +C would be legal where the negative offset -C is

// not.

static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI,

                                       ScalarEvolution &SE, const SCEV *Best,

                                       const SCEV *Reg,

                                       MemAccessTy AccessType) {

  if (Best->getType() != Reg->getType() ||

      (isa<SCEVAddRecExpr>(Best) && isa<SCEVAddRecExpr>(Reg) &&

       cast<SCEVAddRecExpr>(Best)->getLoop() !=

           cast<SCEVAddRecExpr>(Reg)->getLoop()))

    return false;

  std::optional<APInt> Diff = SE.computeConstantDifference(Best, Reg);

  if (!Diff)

    return false;


  return TTI.isLegalAddressingMode(

             AccessType.MemTy, /*BaseGV=*/nullptr,

             /*BaseOffset=*/Diff->getSExtValue(),

             /*HasBaseReg=*/true, /*Scale=*/0, AccessType.AddrSpace) &&

         !TTI.isLegalAddressingMode(

             AccessType.MemTy, /*BaseGV=*/nullptr,

             /*BaseOffset=*/-Diff->getSExtValue(),

             /*HasBaseReg=*/true, /*Scale=*/0, AccessType.AddrSpace);

}


/// Pick a register which seems likely to be profitable, and then in any use

/// which has any reference to that register, delete all formulae which do not

/// reference that register.

void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {

  // With all other options exhausted, loop until the system is simple

  // enough to handle.

  SmallPtrSet<const SCEV *, 4> Taken;

  while (EstimateSearchSpaceComplexity() >= ComplexityLimit) {

    // Ok, we have too many of formulae on our hands to conveniently handle.

    // Use a rough heuristic to thin out the list.

    LLVM_DEBUG(dbgs() << "The search space is too complex.\n");


    // Pick the register which is used by the most LSRUses, which is likely

    // to be a good reuse register candidate.

    const SCEV *Best = nullptr;

    unsigned BestNum = 0;

    for (const SCEV *Reg : RegUses) {

      if (Taken.count(Reg))

        continue;

      if (!Best) {

        Best = Reg;

        BestNum = RegUses.getUsedByIndices(Reg).count();

      } else {

        unsigned Count = RegUses.getUsedByIndices(Reg).count();

        if (Count > BestNum) {

          Best = Reg;

          BestNum = Count;

        }


        // If the scores are the same, but the Reg is simpler for the target

        // (for example {x,+,1} as opposed to {x+C,+,1}, where the target can

        // handle +C but not -C), opt for the simpler formula.

        if (Count == BestNum) {

          int LUIdx = RegUses.getUsedByIndices(Reg).find_first();

          if (LUIdx >= 0 && Uses[LUIdx].Kind == LSRUse::Address &&

              IsSimplerBaseSCEVForTarget(TTI, SE, Best, Reg,

                                         Uses[LUIdx].AccessTy)) {

            Best = Reg;

            BestNum = Count;

          }

        }

      }

    }

    assert(Best && "Failed to find best LSRUse candidate");


    LLVM_DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best

                      << " will yield profitable reuse.\n");

    Taken.insert(Best);


    // In any use with formulae which references this register, delete formulae

    // which don't reference it.

    for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {

      LSRUse &LU = Uses[LUIdx];

      if (!LU.Regs.count(Best)) continue;


      bool Any = false;

      for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {

        Formula &F = LU.Formulae[i];

        if (!F.referencesReg(Best)) {

          LLVM_DEBUG(dbgs() << "  Deleting "; F.print(dbgs()); dbgs() << '\n');

          LU.DeleteFormula(F);

          --e;

          --i;

          Any = true;

          assert(e != 0 && "Use has no formulae left! Is Regs inconsistent?");

          continue;

        }

      }


      if (Any)

        LU.RecomputeRegs(LUIdx, RegUses);

    }


    LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));

  }

}


/// If there are an extraordinary number of formulae to choose from, use some

/// rough heuristics to prune down the number of formulae. This keeps the main

/// solver from taking an extraordinary amount of time in some worst-case

/// scenarios.

void LSRInstance::NarrowSearchSpaceUsingHeuristics() {

  NarrowSearchSpaceByDetectingSupersets();

  NarrowSearchSpaceByCollapsingUnrolledCode();

  NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();

  if (FilterSameScaledReg)

    NarrowSearchSpaceByFilterFormulaWithSameScaledReg();

  NarrowSearchSpaceByFilterPostInc();

  if (LSRExpNarrow)

    NarrowSearchSpaceByDeletingCostlyFormulas();

  else

    NarrowSearchSpaceByPickingWinnerRegs();

}


/// This is the recursive solver.

void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,

                               Cost &SolutionCost,

                               SmallVectorImpl<const Formula *> &Workspace,

                               const Cost &CurCost,

                               const SmallPtrSet<const SCEV *, 16> &CurRegs,

                               DenseSet<const SCEV *> &VisitedRegs) const {

  // Some ideas:

  //  - prune more:

  //    - use more aggressive filtering

  //    - sort the formula so that the most profitable solutions are found first

  //    - sort the uses too

  //  - search faster:

  //    - don't compute a cost, and then compare. compare while computing a cost

  //      and bail early.

  //    - track register sets with SmallBitVector


  const LSRUse &LU = Uses[Workspace.size()];


  // If this use references any register that's already a part of the

  // in-progress solution, consider it a requirement that a formula must

  // reference that register in order to be considered. This prunes out

  // unprofitable searching.

  SmallSetVector<const SCEV *, 4> ReqRegs;

  for (const SCEV *S : CurRegs)

    if (LU.Regs.count(S))

      ReqRegs.insert(S);


  SmallPtrSet<const SCEV *, 16> NewRegs;

  Cost NewCost(L, SE, TTI, AMK);

  for (const Formula &F : LU.Formulae) {

    // Ignore formulae which may not be ideal in terms of register reuse of

    // ReqRegs.  The formula should use all required registers before

    // introducing new ones.

    // This can sometimes (notably when trying to favour postinc) lead to

    // sub-optimial decisions. There it is best left to the cost modelling to

    // get correct.

    if (AMK != TTI::AMK_PostIndexed || LU.Kind != LSRUse::Address) {

      int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size());

      for (const SCEV *Reg : ReqRegs) {

        if ((F.ScaledReg && F.ScaledReg == Reg) ||

            is_contained(F.BaseRegs, Reg)) {

          --NumReqRegsToFind;

          if (NumReqRegsToFind == 0)

            break;

        }

      }

      if (NumReqRegsToFind != 0) {

        // If none of the formulae satisfied the required registers, then we could

        // clear ReqRegs and try again. Currently, we simply give up in this case.

        continue;

      }

    }


    // Evaluate the cost of the current formula. If it's already worse than

    // the current best, prune the search at that point.

    NewCost = CurCost;

    NewRegs = CurRegs;

    NewCost.RateFormula(F, NewRegs, VisitedRegs, LU, HardwareLoopProfitable);

    if (NewCost.isLess(SolutionCost)) {

      Workspace.push_back(&F);

      if (Workspace.size() != Uses.size()) {

        SolveRecurse(Solution, SolutionCost, Workspace, NewCost,

                     NewRegs, VisitedRegs);

        if (F.getNumRegs() == 1 && Workspace.size() == 1)

          VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);

      } else {

        LLVM_DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());

                   dbgs() << ".\nRegs:\n";

                   for (const SCEV *S : NewRegs) dbgs()

                      << "- " << *S << "\n";

                   dbgs() << '\n');


        SolutionCost = NewCost;

        Solution = Workspace;

      }

      Workspace.pop_back();

    }

  }

}


/// Choose one formula from each use. Return the results in the given Solution

/// vector.

void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {

  SmallVector<const Formula *, 8> Workspace;

  Cost SolutionCost(L, SE, TTI, AMK);

  SolutionCost.Lose();

  Cost CurCost(L, SE, TTI, AMK);

  SmallPtrSet<const SCEV *, 16> CurRegs;

  DenseSet<const SCEV *> VisitedRegs;

  Workspace.reserve(Uses.size());


  // SolveRecurse does all the work.

  SolveRecurse(Solution, SolutionCost, Workspace, CurCost,

               CurRegs, VisitedRegs);

  if (Solution.empty()) {

    LLVM_DEBUG(dbgs() << "\nNo Satisfactory Solution\n");

    return;

  }


  // Ok, we've now made all our decisions.

  LLVM_DEBUG(dbgs() << "\n"

                       "The chosen solution requires ";

             SolutionCost.print(dbgs()); dbgs() << ":\n";

             for (size_t i = 0, e = Uses.size(); i != e; ++i) {

               dbgs() << "  ";

               Uses[i].print(dbgs());

               dbgs() << "\n"

                         "    ";

               Solution[i]->print(dbgs());

               dbgs() << '\n';

             });


  assert(Solution.size() == Uses.size() && "Malformed solution!");


  const bool EnableDropUnprofitableSolution = [&] {

    switch (AllowDropSolutionIfLessProfitable) {

    case cl::BOU_TRUE:

      return true;

    case cl::BOU_FALSE:

      return false;

    case cl::BOU_UNSET:

      return TTI.shouldDropLSRSolutionIfLessProfitable();

    }

    llvm_unreachable("Unhandled cl::boolOrDefault enum");

  }();


  if (BaselineCost.isLess(SolutionCost)) {

    if (!EnableDropUnprofitableSolution)

      LLVM_DEBUG(

          dbgs() << "Baseline is more profitable than chosen solution, "

                    "add option 'lsr-drop-solution' to drop LSR solution.\n");

    else {

      LLVM_DEBUG(dbgs() << "Baseline is more profitable than chosen "

                           "solution, dropping LSR solution.\n";);

      Solution.clear();

    }

  }

}


/// Helper for AdjustInsertPositionForExpand. Climb up the dominator tree far as

/// we can go while still being dominated by the input positions. This helps

/// canonicalize the insert position, which encourages sharing.

BasicBlock::iterator

LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,

                                 const SmallVectorImpl<Instruction *> &Inputs)

                                                                         const {

  Instruction *Tentative = &*IP;

  while (true) {

    bool AllDominate = true;

    Instruction *BetterPos = nullptr;

    // Don't bother attempting to insert before a catchswitch, their basic block

    // cannot have other non-PHI instructions.

    if (isa<CatchSwitchInst>(Tentative))

      return IP;


    for (Instruction *Inst : Inputs) {

      if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {

        AllDominate = false;

        break;

      }

      // Attempt to find an insert position in the middle of the block,

      // instead of at the end, so that it can be used for other expansions.

      if (Tentative->getParent() == Inst->getParent() &&

          (!BetterPos || !DT.dominates(Inst, BetterPos)))

        BetterPos = &*std::next(BasicBlock::iterator(Inst));

    }

    if (!AllDominate)

      break;

    if (BetterPos)

      IP = BetterPos->getIterator();

    else

      IP = Tentative->getIterator();


    const Loop *IPLoop = LI.getLoopFor(IP->getParent());

    unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0;


    BasicBlock *IDom;

    for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) {

      if (!Rung) return IP;

      Rung = Rung->getIDom();

      if (!Rung) return IP;

      IDom = Rung->getBlock();


      // Don't climb into a loop though.

      const Loop *IDomLoop = LI.getLoopFor(IDom);

      unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0;

      if (IDomDepth <= IPLoopDepth &&

          (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))

        break;

    }


    Tentative = IDom->getTerminator();

  }


  return IP;

}


/// Determine an input position which will be dominated by the operands and

/// which will dominate the result.

BasicBlock::iterator LSRInstance::AdjustInsertPositionForExpand(

    BasicBlock::iterator LowestIP, const LSRFixup &LF, const LSRUse &LU) const {

  // Collect some instructions which must be dominated by the

  // expanding replacement. These must be dominated by any operands that

  // will be required in the expansion.

  SmallVector<Instruction *, 4> Inputs;

  if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))

    Inputs.push_back(I);

  if (LU.Kind == LSRUse::ICmpZero)

    if (Instruction *I =

          dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))

      Inputs.push_back(I);

  if (LF.PostIncLoops.count(L)) {

    if (LF.isUseFullyOutsideLoop(L))

      Inputs.push_back(L->getLoopLatch()->getTerminator());

    else

      Inputs.push_back(IVIncInsertPos);

  }

  // The expansion must also be dominated by the increment positions of any

  // loops it for which it is using post-inc mode.

  for (const Loop *PIL : LF.PostIncLoops) {

    if (PIL == L) continue;


    // Be dominated by the loop exit.

    SmallVector<BasicBlock *, 4> ExitingBlocks;

    PIL->getExitingBlocks(ExitingBlocks);

    if (!ExitingBlocks.empty()) {

      BasicBlock *BB = ExitingBlocks[0];

      for (unsigned i = 1, e = ExitingBlocks.size(); i != e; ++i)

        BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]);

      Inputs.push_back(BB->getTerminator());

    }

  }


  assert(!isa<PHINode>(LowestIP) && !LowestIP->isEHPad() &&

         "Insertion point must be a normal instruction");


  // Then, climb up the immediate dominator tree as far as we can go while

  // still being dominated by the input positions.

  BasicBlock::iterator IP = HoistInsertPosition(LowestIP, Inputs);


  // Don't insert instructions before PHI nodes.

  while (isa<PHINode>(IP)) ++IP;


  // Ignore landingpad instructions.

  while (IP->isEHPad()) ++IP;


  // Set IP below instructions recently inserted by SCEVExpander. This keeps the

  // IP consistent across expansions and allows the previously inserted

  // instructions to be reused by subsequent expansion.

  while (Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)

    ++IP;


  return IP;

}


/// Emit instructions for the leading candidate expression for this LSRUse (this

/// is called "expanding").

Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF,

                           const Formula &F, BasicBlock::iterator IP,

                           SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {

  if (LU.RigidFormula)

    return LF.OperandValToReplace;


  // Determine an input position which will be dominated by the operands and

  // which will dominate the result.

  IP = AdjustInsertPositionForExpand(IP, LF, LU);

  Rewriter.setInsertPoint(&*IP);


  // Inform the Rewriter if we have a post-increment use, so that it can

  // perform an advantageous expansion.

  Rewriter.setPostInc(LF.PostIncLoops);


  // This is the type that the user actually needs.

  Type *OpTy = LF.OperandValToReplace->getType();

  // This will be the type that we'll initially expand to.

  Type *Ty = F.getType();

  if (!Ty)

    // No type known; just expand directly to the ultimate type.

    Ty = OpTy;

  else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(OpTy))

    // Expand directly to the ultimate type if it's the right size.

    Ty = OpTy;

  // This is the type to do integer arithmetic in.

  Type *IntTy = SE.getEffectiveSCEVType(Ty);


  // Build up a list of operands to add together to form the full base.

  SmallVector<const SCEV *, 8> Ops;


  // Expand the BaseRegs portion.

  for (const SCEV *Reg : F.BaseRegs) {

    assert(!Reg->isZero() && "Zero allocated in a base register!");


    // If we're expanding for a post-inc user, make the post-inc adjustment.

    Reg = denormalizeForPostIncUse(Reg, LF.PostIncLoops, SE);

    Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr)));

  }


  // Expand the ScaledReg portion.

  Value *ICmpScaledV = nullptr;

  if (F.Scale != 0) {

    const SCEV *ScaledS = F.ScaledReg;


    // If we're expanding for a post-inc user, make the post-inc adjustment.

    PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);

    ScaledS = denormalizeForPostIncUse(ScaledS, Loops, SE);


    if (LU.Kind == LSRUse::ICmpZero) {

      // Expand ScaleReg as if it was part of the base regs.

      if (F.Scale == 1)

        Ops.push_back(

            SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr)));

      else {

        // An interesting way of "folding" with an icmp is to use a negated

        // scale, which we'll implement by inserting it into the other operand

        // of the icmp.

        assert(F.Scale == -1 &&

               "The only scale supported by ICmpZero uses is -1!");

        ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr);

      }

    } else {

      // Otherwise just expand the scaled register and an explicit scale,

      // which is expected to be matched as part of the address.


      // Flush the operand list to suppress SCEVExpander hoisting address modes.

      // Unless the addressing mode will not be folded.

      if (!Ops.empty() && LU.Kind == LSRUse::Address &&

          isAMCompletelyFolded(TTI, LU, F)) {

        Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), nullptr);

        Ops.clear();

        Ops.push_back(SE.getUnknown(FullV));

      }

      ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr));

      if (F.Scale != 1)

        ScaledS =

            SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale));

      Ops.push_back(ScaledS);

    }

  }


  // Expand the GV portion.

  if (F.BaseGV) {

    // Flush the operand list to suppress SCEVExpander hoisting.

    if (!Ops.empty()) {

      Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), IntTy);

      Ops.clear();

      Ops.push_back(SE.getUnknown(FullV));

    }

    Ops.push_back(SE.getUnknown(F.BaseGV));

  }


  // Flush the operand list to suppress SCEVExpander hoisting of both folded and

  // unfolded offsets. LSR assumes they both live next to their uses.

  if (!Ops.empty()) {

    Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);

    Ops.clear();

    Ops.push_back(SE.getUnknown(FullV));

  }


  // FIXME: Are we sure we won't get a mismatch here? Is there a way to bail

  // out at this point, or should we generate a SCEV adding together mixed

  // offsets?

  assert(F.BaseOffset.isCompatibleImmediate(LF.Offset) &&

         "Expanding mismatched offsets\n");

  // Expand the immediate portion.

  Immediate Offset = F.BaseOffset.addUnsigned(LF.Offset);

  if (Offset.isNonZero()) {

    if (LU.Kind == LSRUse::ICmpZero) {

      // The other interesting way of "folding" with an ICmpZero is to use a

      // negated immediate.

      if (!ICmpScaledV)

        ICmpScaledV =

            ConstantInt::get(IntTy, -(uint64_t)Offset.getFixedValue());

      else {

        Ops.push_back(SE.getUnknown(ICmpScaledV));

        ICmpScaledV = ConstantInt::get(IntTy, Offset.getFixedValue());

      }

    } else {

      // Just add the immediate values. These again are expected to be matched

      // as part of the address.

      Ops.push_back(Offset.getUnknownSCEV(SE, IntTy));

    }

  }


  // Expand the unfolded offset portion.

  Immediate UnfoldedOffset = F.UnfoldedOffset;

  if (UnfoldedOffset.isNonZero()) {

    // Just add the immediate values.

    Ops.push_back(UnfoldedOffset.getUnknownSCEV(SE, IntTy));

  }


  // Emit instructions summing all the operands.

  const SCEV *FullS = Ops.empty() ?

                      SE.getConstant(IntTy, 0) :

                      SE.getAddExpr(Ops);

  Value *FullV = Rewriter.expandCodeFor(FullS, Ty);


  // We're done expanding now, so reset the rewriter.

  Rewriter.clearPostInc();


  // An ICmpZero Formula represents an ICmp which we're handling as a

  // comparison against zero. Now that we've expanded an expression for that

  // form, update the ICmp's other operand.

  if (LU.Kind == LSRUse::ICmpZero) {

    ICmpInst *CI = cast<ICmpInst>(LF.UserInst);

    if (auto *OperandIsInstr = dyn_cast<Instruction>(CI->getOperand(1)))

      DeadInsts.emplace_back(OperandIsInstr);

    assert(!F.BaseGV && "ICmp does not support folding a global value and "

                           "a scale at the same time!");

    if (F.Scale == -1) {

      if (ICmpScaledV->getType() != OpTy) {

        Instruction *Cast = CastInst::Create(

            CastInst::getCastOpcode(ICmpScaledV, false, OpTy, false),

            ICmpScaledV, OpTy, "tmp", CI->getIterator());

        ICmpScaledV = Cast;

      }

      CI->setOperand(1, ICmpScaledV);

    } else {

      // A scale of 1 means that the scale has been expanded as part of the

      // base regs.

      assert((F.Scale == 0 || F.Scale == 1) &&

             "ICmp does not support folding a global value and "

             "a scale at the same time!");

      Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),

                                           -(uint64_t)Offset.getFixedValue());

      if (C->getType() != OpTy) {

        C = ConstantFoldCastOperand(

            CastInst::getCastOpcode(C, false, OpTy, false), C, OpTy,

            CI->getDataLayout());

        assert(C && "Cast of ConstantInt should have folded");

      }


      CI->setOperand(1, C);

    }

  }


  return FullV;

}


/// Helper for Rewrite. PHI nodes are special because the use of their operands

/// effectively happens in their predecessor blocks, so the expression may need

/// to be expanded in multiple places.

void LSRInstance::RewriteForPHI(PHINode *PN, const LSRUse &LU,

                                const LSRFixup &LF, const Formula &F,

                                SmallVectorImpl<WeakTrackingVH> &DeadInsts) {

  DenseMap<BasicBlock *, Value *> Inserted;


  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)

    if (PN->getIncomingValue(i) == LF.OperandValToReplace) {

      bool needUpdateFixups = false;

      BasicBlock *BB = PN->getIncomingBlock(i);


      // If this is a critical edge, split the edge so that we do not insert

      // the code on all predecessor/successor paths.  We do this unless this

      // is the canonical backedge for this loop, which complicates post-inc

      // users.

      if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&

          !isa<IndirectBrInst>(BB->getTerminator()) &&

          !isa<CatchSwitchInst>(BB->getTerminator())) {

        BasicBlock *Parent = PN->getParent();

        Loop *PNLoop = LI.getLoopFor(Parent);

        if (!PNLoop || Parent != PNLoop->getHeader()) {

          // Split the critical edge.

          BasicBlock *NewBB = nullptr;

          if (!Parent->isLandingPad()) {

            NewBB =

                SplitCriticalEdge(BB, Parent,

                                  CriticalEdgeSplittingOptions(&DT, &LI, MSSAU)

                                      .setMergeIdenticalEdges()

                                      .setKeepOneInputPHIs());

          } else {

            SmallVector<BasicBlock*, 2> NewBBs;

            DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);

            SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs, &DTU, &LI);

            NewBB = NewBBs[0];

          }

          // If NewBB==NULL, then SplitCriticalEdge refused to split because all

          // phi predecessors are identical. The simple thing to do is skip

          // splitting in this case rather than complicate the API.

          if (NewBB) {

            // If PN is outside of the loop and BB is in the loop, we want to

            // move the block to be immediately before the PHI block, not

            // immediately after BB.

            if (L->contains(BB) && !L->contains(PN))

              NewBB->moveBefore(PN->getParent());


            // Splitting the edge can reduce the number of PHI entries we have.

            e = PN->getNumIncomingValues();

            BB = NewBB;

            i = PN->getBasicBlockIndex(BB);


            needUpdateFixups = true;

          }

        }

      }


      std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =

          Inserted.try_emplace(BB);

      if (!Pair.second)

        PN->setIncomingValue(i, Pair.first->second);

      else {

        Value *FullV =

            Expand(LU, LF, F, BB->getTerminator()->getIterator(), DeadInsts);


        // If this is reuse-by-noop-cast, insert the noop cast.

        Type *OpTy = LF.OperandValToReplace->getType();

        if (FullV->getType() != OpTy)

          FullV = CastInst::Create(

              CastInst::getCastOpcode(FullV, false, OpTy, false), FullV,

              LF.OperandValToReplace->getType(), "tmp",

              BB->getTerminator()->getIterator());


        // If the incoming block for this value is not in the loop, it means the

        // current PHI is not in a loop exit, so we must create a LCSSA PHI for

        // the inserted value.

        if (auto *I = dyn_cast<Instruction>(FullV))

          if (L->contains(I) && !L->contains(BB))

            InsertedNonLCSSAInsts.insert(I);


        PN->setIncomingValue(i, FullV);

        Pair.first->second = FullV;

      }


      // If LSR splits critical edge and phi node has other pending

      // fixup operands, we need to update those pending fixups. Otherwise

      // formulae will not be implemented completely and some instructions

      // will not be eliminated.

      if (needUpdateFixups) {

        for (LSRUse &LU : Uses)

          for (LSRFixup &Fixup : LU.Fixups)

            // If fixup is supposed to rewrite some operand in the phi

            // that was just updated, it may be already moved to

            // another phi node. Such fixup requires update.

            if (Fixup.UserInst == PN) {

              // Check if the operand we try to replace still exists in the

              // original phi.

              bool foundInOriginalPHI = false;

              for (const auto &val : PN->incoming_values())

                if (val == Fixup.OperandValToReplace) {

                  foundInOriginalPHI = true;

                  break;

                }


              // If fixup operand found in original PHI - nothing to do.

              if (foundInOriginalPHI)

                continue;


              // Otherwise it might be moved to another PHI and requires update.

              // If fixup operand not found in any of the incoming blocks that

              // means we have already rewritten it - nothing to do.

              for (const auto &Block : PN->blocks())

                for (BasicBlock::iterator I = Block->begin(); isa<PHINode>(I);

                     ++I) {

                  PHINode *NewPN = cast<PHINode>(I);

                  for (const auto &val : NewPN->incoming_values())

                    if (val == Fixup.OperandValToReplace)

                      Fixup.UserInst = NewPN;

                }

            }

      }

    }

}


/// Emit instructions for the leading candidate expression for this LSRUse (this

/// is called "expanding"), and update the UserInst to reference the newly

/// expanded value.

void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF,

                          const Formula &F,

                          SmallVectorImpl<WeakTrackingVH> &DeadInsts) {

  // First, find an insertion point that dominates UserInst. For PHI nodes,

  // find the nearest block which dominates all the relevant uses.

  if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {

    RewriteForPHI(PN, LU, LF, F, DeadInsts);

  } else {

    Value *FullV = Expand(LU, LF, F, LF.UserInst->getIterator(), DeadInsts);


    // If this is reuse-by-noop-cast, insert the noop cast.

    Type *OpTy = LF.OperandValToReplace->getType();

    if (FullV->getType() != OpTy) {

      Instruction *Cast =

          CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false),

                           FullV, OpTy, "tmp", LF.UserInst->getIterator());

      FullV = Cast;

    }


    // Update the user. ICmpZero is handled specially here (for now) because

    // Expand may have updated one of the operands of the icmp already, and

    // its new value may happen to be equal to LF.OperandValToReplace, in

    // which case doing replaceUsesOfWith leads to replacing both operands

    // with the same value. TODO: Reorganize this.

    if (LU.Kind == LSRUse::ICmpZero)

      LF.UserInst->setOperand(0, FullV);

    else

      LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);

  }


  if (auto *OperandIsInstr = dyn_cast<Instruction>(LF.OperandValToReplace))

    DeadInsts.emplace_back(OperandIsInstr);

}


// Trying to hoist the IVInc to loop header if all IVInc users are in

// the loop header. It will help backend to generate post index load/store

// when the latch block is different from loop header block.

static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup,

                          const LSRUse &LU, Instruction *IVIncInsertPos,

                          Loop *L) {

  if (LU.Kind != LSRUse::Address)

    return false;


  // For now this code do the conservative optimization, only work for

  // the header block. Later we can hoist the IVInc to the block post

  // dominate all users.

  BasicBlock *LHeader = L->getHeader();

  if (IVIncInsertPos->getParent() == LHeader)

    return false;


  if (!Fixup.OperandValToReplace ||

      any_of(Fixup.OperandValToReplace->users(), [&LHeader](User *U) {

        Instruction *UI = cast<Instruction>(U);

        return UI->getParent() != LHeader;

      }))

    return false;


  Instruction *I = Fixup.UserInst;

  Type *Ty = I->getType();

  return (isa<LoadInst>(I) && TTI.isIndexedLoadLegal(TTI.MIM_PostInc, Ty)) ||

         (isa<StoreInst>(I) && TTI.isIndexedStoreLegal(TTI.MIM_PostInc, Ty));

}


/// Rewrite all the fixup locations with new values, following the chosen

/// solution.

void LSRInstance::ImplementSolution(

    const SmallVectorImpl<const Formula *> &Solution) {

  // Keep track of instructions we may have made dead, so that

  // we can remove them after we are done working.

  SmallVector<WeakTrackingVH, 16> DeadInsts;


  // Mark phi nodes that terminate chains so the expander tries to reuse them.

  for (const IVChain &Chain : IVChainVec) {

    if (PHINode *PN = dyn_cast<PHINode>(Chain.tailUserInst()))

      Rewriter.setChainedPhi(PN);

  }


  // Expand the new value definitions and update the users.

  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx)

    for (const LSRFixup &Fixup : Uses[LUIdx].Fixups) {

      Instruction *InsertPos =

          canHoistIVInc(TTI, Fixup, Uses[LUIdx], IVIncInsertPos, L)

              ? L->getHeader()->getTerminator()

              : IVIncInsertPos;

      Rewriter.setIVIncInsertPos(L, InsertPos);

      Rewrite(Uses[LUIdx], Fixup, *Solution[LUIdx], DeadInsts);

      Changed = true;

    }


  auto InsertedInsts = InsertedNonLCSSAInsts.takeVector();

  formLCSSAForInstructions(InsertedInsts, DT, LI, &SE);


  for (const IVChain &Chain : IVChainVec) {

    GenerateIVChain(Chain, DeadInsts);

    Changed = true;

  }


  for (const WeakVH &IV : Rewriter.getInsertedIVs())

    if (IV && dyn_cast<Instruction>(&*IV)->getParent())

      ScalarEvolutionIVs.push_back(IV);


  // Clean up after ourselves. This must be done before deleting any

  // instructions.

  Rewriter.clear();


  Changed |= RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts,

                                                                  &TLI, MSSAU);


  // In our cost analysis above, we assume that each addrec consumes exactly

  // one register, and arrange to have increments inserted just before the

  // latch to maximimize the chance this is true.  However, if we reused

  // existing IVs, we now need to move the increments to match our

  // expectations.  Otherwise, our cost modeling results in us having a

  // chosen a non-optimal result for the actual schedule.  (And yes, this

  // scheduling decision does impact later codegen.)

  for (PHINode &PN : L->getHeader()->phis()) {

    BinaryOperator *BO = nullptr;

    Value *Start = nullptr, *Step = nullptr;

    if (!matchSimpleRecurrence(&PN, BO, Start, Step))

      continue;


    switch (BO->getOpcode()) {

    case Instruction::Sub:

      if (BO->getOperand(0) != &PN)

        // sub is non-commutative - match handling elsewhere in LSR

        continue;

      break;

    case Instruction::Add:

      break;

    default:

      continue;

    };


    if (!isa<Constant>(Step))

      // If not a constant step, might increase register pressure

      // (We assume constants have been canonicalized to RHS)

      continue;


    if (BO->getParent() == IVIncInsertPos->getParent())

      // Only bother moving across blocks.  Isel can handle block local case.

      continue;


    // Can we legally schedule inc at the desired point?

    if (!llvm::all_of(BO->uses(),

                      [&](Use &U) {return DT.dominates(IVIncInsertPos, U);}))

      continue;

    BO->moveBefore(IVIncInsertPos->getIterator());

    Changed = true;

  }


}


LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,

                         DominatorTree &DT, LoopInfo &LI,

                         const TargetTransformInfo &TTI, AssumptionCache &AC,

                         TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU)

    : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI), TTI(TTI), L(L),

      MSSAU(MSSAU), AMK(PreferredAddresingMode.getNumOccurrences() > 0

                            ? PreferredAddresingMode

                            : TTI.getPreferredAddressingMode(L, &SE)),

      Rewriter(SE, L->getHeader()->getDataLayout(), "lsr", false),

      BaselineCost(L, SE, TTI, AMK) {

  // If LoopSimplify form is not available, stay out of trouble.

  if (!L->isLoopSimplifyForm())

    return;


  // If there's no interesting work to be done, bail early.

  if (IU.empty()) return;


  // If there's too much analysis to be done, bail early. We won't be able to

  // model the problem anyway.

  unsigned NumUsers = 0;

  for (const IVStrideUse &U : IU) {

    if (++NumUsers > MaxIVUsers) {

      (void)U;

      LLVM_DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << U

                        << "\n");

      return;

    }

    // Bail out if we have a PHI on an EHPad that gets a value from a

    // CatchSwitchInst.  Because the CatchSwitchInst cannot be split, there is

    // no good place to stick any instructions.

    if (auto *PN = dyn_cast<PHINode>(U.getUser())) {

       auto FirstNonPHI = PN->getParent()->getFirstNonPHIIt();

       if (isa<FuncletPadInst>(FirstNonPHI) ||

           isa<CatchSwitchInst>(FirstNonPHI))

         for (BasicBlock *PredBB : PN->blocks())

           if (isa<CatchSwitchInst>(PredBB->getFirstNonPHIIt()))

             return;

    }

  }


  LLVM_DEBUG(dbgs() << "\nLSR on loop ";

             L->getHeader()->printAsOperand(dbgs(), /*PrintType=*/false);

             dbgs() << ":\n");


  // Check if we expect this loop to use a hardware loop instruction, which will

  // be used when calculating the costs of formulas.

  HardwareLoopInfo HWLoopInfo(L);

  HardwareLoopProfitable =

      TTI.isHardwareLoopProfitable(L, SE, AC, &TLI, HWLoopInfo);


  // Configure SCEVExpander already now, so the correct mode is used for

  // isSafeToExpand() checks.

#if LLVM_ENABLE_ABI_BREAKING_CHECKS

  Rewriter.setDebugType(DEBUG_TYPE);

#endif

  Rewriter.disableCanonicalMode();

  Rewriter.enableLSRMode();


  // First, perform some low-level loop optimizations.

  OptimizeShadowIV();

  OptimizeLoopTermCond();


  // If loop preparation eliminates all interesting IV users, bail.

  if (IU.empty()) return;


  // Skip nested loops until we can model them better with formulae.

  if (!L->isInnermost()) {

    LLVM_DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n");

    return;

  }


  // Start collecting data and preparing for the solver.

  // If number of registers is not the major cost, we cannot benefit from the

  // current profitable chain optimization which is based on number of

  // registers.

  // FIXME: add profitable chain optimization for other kinds major cost, for

  // example number of instructions.

  if (TTI.isNumRegsMajorCostOfLSR() || StressIVChain)

    CollectChains();

  CollectInterestingTypesAndFactors();

  CollectFixupsAndInitialFormulae();

  CollectLoopInvariantFixupsAndFormulae();


  if (Uses.empty())

    return;


  LLVM_DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n";

             print_uses(dbgs()));

  LLVM_DEBUG(dbgs() << "The baseline solution requires ";

             BaselineCost.print(dbgs()); dbgs() << "\n");


  // Now use the reuse data to generate a bunch of interesting ways

  // to formulate the values needed for the uses.

  GenerateAllReuseFormulae();


  FilterOutUndesirableDedicatedRegisters();

  NarrowSearchSpaceUsingHeuristics();


  SmallVector<const Formula *, 8> Solution;

  Solve(Solution);


  // Release memory that is no longer needed.

  Factors.clear();

  Types.clear();

  RegUses.clear();


  if (Solution.empty())

    return;


#ifndef NDEBUG

  // Formulae should be legal.

  for (const LSRUse &LU : Uses) {

    for (const Formula &F : LU.Formulae)

      assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,

                        F) && "Illegal formula generated!");

  };

#endif


  // Now that we've decided what we want, make it so.

  ImplementSolution(Solution);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void LSRInstance::print_factors_and_types(raw_ostream &OS) const {

  if (Factors.empty() && Types.empty()) return;


  OS << "LSR has identified the following interesting factors and types: ";

  bool First = true;


  for (int64_t Factor : Factors) {

    if (!First) OS << ", ";

    First = false;

    OS << '*' << Factor;

  }


  for (Type *Ty : Types) {

    if (!First) OS << ", ";

    First = false;

    OS << '(' << *Ty << ')';

  }

  OS << '\n';

}


void LSRInstance::print_fixups(raw_ostream &OS) const {

  OS << "LSR is examining the following fixup sites:\n";

  for (const LSRUse &LU : Uses)

    for (const LSRFixup &LF : LU.Fixups) {

      dbgs() << "  ";

      LF.print(OS);

      OS << '\n';

    }

}


void LSRInstance::print_uses(raw_ostream &OS) const {

  OS << "LSR is examining the following uses:\n";

  for (const LSRUse &LU : Uses) {

    dbgs() << "  ";

    LU.print(OS);

    OS << '\n';

    for (const Formula &F : LU.Formulae) {

      OS << "    ";

      F.print(OS);

      OS << '\n';

    }

  }

}


void LSRInstance::print(raw_ostream &OS) const {

  print_factors_and_types(OS);

  print_fixups(OS);

  print_uses(OS);

}


LLVM_DUMP_METHOD void LSRInstance::dump() const {

  print(errs()); errs() << '\n';

}

#endif


namespace {


class LoopStrengthReduce : public LoopPass {

public:

  static char ID; // Pass ID, replacement for typeid


  LoopStrengthReduce();


private:

  bool runOnLoop(Loop *L, LPPassManager &LPM) override;

  void getAnalysisUsage(AnalysisUsage &AU) const override;

};


} // end anonymous namespace


LoopStrengthReduce::LoopStrengthReduce() : LoopPass(ID) {

  initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());

}


void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {

  // We split critical edges, so we change the CFG.  However, we do update

  // many analyses if they are around.

  AU.addPreservedID(LoopSimplifyID);


  AU.addRequired<LoopInfoWrapperPass>();

  AU.addPreserved<LoopInfoWrapperPass>();

  AU.addRequiredID(LoopSimplifyID);

  AU.addRequired<DominatorTreeWrapperPass>();

  AU.addPreserved<DominatorTreeWrapperPass>();

  AU.addRequired<ScalarEvolutionWrapperPass>();

  AU.addPreserved<ScalarEvolutionWrapperPass>();

  AU.addRequired<AssumptionCacheTracker>();

  AU.addRequired<TargetLibraryInfoWrapperPass>();

  // Requiring LoopSimplify a second time here prevents IVUsers from running

  // twice, since LoopSimplify was invalidated by running ScalarEvolution.

  AU.addRequiredID(LoopSimplifyID);

  AU.addRequired<IVUsersWrapperPass>();

  AU.addPreserved<IVUsersWrapperPass>();

  AU.addRequired<TargetTransformInfoWrapperPass>();

  AU.addPreserved<MemorySSAWrapperPass>();

}


namespace {


/// Enables more convenient iteration over a DWARF expression vector.

static iterator_range<llvm::DIExpression::expr_op_iterator>

ToDwarfOpIter(SmallVectorImpl<uint64_t> &Expr) {

  llvm::DIExpression::expr_op_iterator Begin =

      llvm::DIExpression::expr_op_iterator(Expr.begin());

  llvm::DIExpression::expr_op_iterator End =

      llvm::DIExpression::expr_op_iterator(Expr.end());

  return {Begin, End};

}


struct SCEVDbgValueBuilder {

  SCEVDbgValueBuilder() = default;

  SCEVDbgValueBuilder(const SCEVDbgValueBuilder &Base) { clone(Base); }


  void clone(const SCEVDbgValueBuilder &Base) {

    LocationOps = Base.LocationOps;

    Expr = Base.Expr;

  }


  void clear() {

    LocationOps.clear();

    Expr.clear();

  }


  /// The DIExpression as we translate the SCEV.

  SmallVector<uint64_t, 6> Expr;

  /// The location ops of the DIExpression.

  SmallVector<Value *, 2> LocationOps;


  void pushOperator(uint64_t Op) { Expr.push_back(Op); }

  void pushUInt(uint64_t Operand) { Expr.push_back(Operand); }


  /// Add a DW_OP_LLVM_arg to the expression, followed by the index of the value

  /// in the set of values referenced by the expression.

  void pushLocation(llvm::Value *V) {

    Expr.push_back(llvm::dwarf::DW_OP_LLVM_arg);

    auto *It = llvm::find(LocationOps, V);

    unsigned ArgIndex = 0;

    if (It != LocationOps.end()) {

      ArgIndex = std::distance(LocationOps.begin(), It);

    } else {

      ArgIndex = LocationOps.size();

      LocationOps.push_back(V);

    }

    Expr.push_back(ArgIndex);

  }


  void pushValue(const SCEVUnknown *U) {

    llvm::Value *V = cast<SCEVUnknown>(U)->getValue();

    pushLocation(V);

  }


  bool pushConst(const SCEVConstant *C) {

    if (C->getAPInt().getSignificantBits() > 64)

      return false;

    Expr.push_back(llvm::dwarf::DW_OP_consts);

    Expr.push_back(C->getAPInt().getSExtValue());

    return true;

  }


  // Iterating the expression as DWARF ops is convenient when updating

  // DWARF_OP_LLVM_args.

  iterator_range<llvm::DIExpression::expr_op_iterator> expr_ops() {

    return ToDwarfOpIter(Expr);

  }


  /// Several SCEV types are sequences of the same arithmetic operator applied

  /// to constants and values that may be extended or truncated.

  bool pushArithmeticExpr(const llvm::SCEVCommutativeExpr *CommExpr,

                          uint64_t DwarfOp) {

    assert((isa<llvm::SCEVAddExpr>(CommExpr) || isa<SCEVMulExpr>(CommExpr)) &&

           "Expected arithmetic SCEV type");

    bool Success = true;

    unsigned EmitOperator = 0;

    for (const auto &Op : CommExpr->operands()) {

      Success &= pushSCEV(Op);


      if (EmitOperator >= 1)

        pushOperator(DwarfOp);

      ++EmitOperator;

    }

    return Success;

  }


  // TODO: Identify and omit noop casts.

  bool pushCast(const llvm::SCEVCastExpr *C, bool IsSigned) {

    const llvm::SCEV *Inner = C->getOperand(0);

    const llvm::Type *Type = C->getType();

    uint64_t ToWidth = Type->getIntegerBitWidth();

    bool Success = pushSCEV(Inner);

    uint64_t CastOps[] = {dwarf::DW_OP_LLVM_convert, ToWidth,

                          IsSigned ? llvm::dwarf::DW_ATE_signed

                                   : llvm::dwarf::DW_ATE_unsigned};

    for (const auto &Op : CastOps)

      pushOperator(Op);

    return Success;

  }


  // TODO: MinMax - although these haven't been encountered in the test suite.

  bool pushSCEV(const llvm::SCEV *S) {

    bool Success = true;

    if (const SCEVConstant *StartInt = dyn_cast<SCEVConstant>(S)) {

      Success &= pushConst(StartInt);


    } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {

      if (!U->getValue())

        return false;

      pushLocation(U->getValue());


    } else if (const SCEVMulExpr *MulRec = dyn_cast<SCEVMulExpr>(S)) {

      Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul);


    } else if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {

      Success &= pushSCEV(UDiv->getLHS());

      Success &= pushSCEV(UDiv->getRHS());

      pushOperator(llvm::dwarf::DW_OP_div);


    } else if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(S)) {

      // Assert if a new and unknown SCEVCastEXpr type is encountered.

      assert((isa<SCEVZeroExtendExpr>(Cast) || isa<SCEVTruncateExpr>(Cast) ||

              isa<SCEVPtrToIntExpr>(Cast) || isa<SCEVSignExtendExpr>(Cast)) &&

             "Unexpected cast type in SCEV.");

      Success &= pushCast(Cast, (isa<SCEVSignExtendExpr>(Cast)));


    } else if (const SCEVAddExpr *AddExpr = dyn_cast<SCEVAddExpr>(S)) {

      Success &= pushArithmeticExpr(AddExpr, llvm::dwarf::DW_OP_plus);


    } else if (isa<SCEVAddRecExpr>(S)) {

      // Nested SCEVAddRecExpr are generated by nested loops and are currently

      // unsupported.

      return false;


    } else {

      return false;

    }

    return Success;

  }


  /// Return true if the combination of arithmetic operator and underlying

  /// SCEV constant value is an identity function.

  bool isIdentityFunction(uint64_t Op, const SCEV *S) {

    if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {

      if (C->getAPInt().getSignificantBits() > 64)

        return false;

      int64_t I = C->getAPInt().getSExtValue();

      switch (Op) {

      case llvm::dwarf::DW_OP_plus:

      case llvm::dwarf::DW_OP_minus:

        return I == 0;

      case llvm::dwarf::DW_OP_mul:

      case llvm::dwarf::DW_OP_div:

        return I == 1;

      }

    }

    return false;

  }


  /// Convert a SCEV of a value to a DIExpression that is pushed onto the

  /// builder's expression stack. The stack should already contain an

  /// expression for the iteration count, so that it can be multiplied by

  /// the stride and added to the start.

  /// Components of the expression are omitted if they are an identity function.

  /// Chain (non-affine) SCEVs are not supported.

  bool SCEVToValueExpr(const llvm::SCEVAddRecExpr &SAR, ScalarEvolution &SE) {

    assert(SAR.isAffine() && "Expected affine SCEV");

    const SCEV *Start = SAR.getStart();

    const SCEV *Stride = SAR.getStepRecurrence(SE);


    // Skip pushing arithmetic noops.

    if (!isIdentityFunction(llvm::dwarf::DW_OP_mul, Stride)) {

      if (!pushSCEV(Stride))

        return false;

      pushOperator(llvm::dwarf::DW_OP_mul);

    }

    if (!isIdentityFunction(llvm::dwarf::DW_OP_plus, Start)) {

      if (!pushSCEV(Start))

        return false;

      pushOperator(llvm::dwarf::DW_OP_plus);

    }

    return true;

  }


  /// Create an expression that is an offset from a value (usually the IV).

  void createOffsetExpr(int64_t Offset, Value *OffsetValue) {

    pushLocation(OffsetValue);

    DIExpression::appendOffset(Expr, Offset);

    LLVM_DEBUG(

        dbgs() << "scev-salvage: Generated IV offset expression. Offset: "

               << std::to_string(Offset) << "\n");

  }


  /// Combine a translation of the SCEV and the IV to create an expression that

  /// recovers a location's value.

  /// returns true if an expression was created.

  bool createIterCountExpr(const SCEV *S,

                           const SCEVDbgValueBuilder &IterationCount,

                           ScalarEvolution &SE) {

    // SCEVs for SSA values are most frquently of the form

    // {start,+,stride}, but sometimes they are ({start,+,stride} + %a + ..).

    // This is because %a is a PHI node that is not the IV. However, these

    // SCEVs have not been observed to result in debuginfo-lossy optimisations,

    // so its not expected this point will be reached.

    if (!isa<SCEVAddRecExpr>(S))

      return false;


    LLVM_DEBUG(dbgs() << "scev-salvage: Location to salvage SCEV: " << *S

                      << '\n');


    const auto *Rec = cast<SCEVAddRecExpr>(S);

    if (!Rec->isAffine())

      return false;


    if (S->getExpressionSize() > MaxSCEVSalvageExpressionSize)

      return false;


    // Initialise a new builder with the iteration count expression. In

    // combination with the value's SCEV this enables recovery.

    clone(IterationCount);

    if (!SCEVToValueExpr(*Rec, SE))

      return false;


    return true;

  }


  /// Convert a SCEV of a value to a DIExpression that is pushed onto the

  /// builder's expression stack. The stack should already contain an

  /// expression for the iteration count, so that it can be multiplied by

  /// the stride and added to the start.

  /// Components of the expression are omitted if they are an identity function.

  bool SCEVToIterCountExpr(const llvm::SCEVAddRecExpr &SAR,

                           ScalarEvolution &SE) {

    assert(SAR.isAffine() && "Expected affine SCEV");

    const SCEV *Start = SAR.getStart();

    const SCEV *Stride = SAR.getStepRecurrence(SE);


    // Skip pushing arithmetic noops.

    if (!isIdentityFunction(llvm::dwarf::DW_OP_minus, Start)) {

      if (!pushSCEV(Start))

        return false;

      pushOperator(llvm::dwarf::DW_OP_minus);

    }

    if (!isIdentityFunction(llvm::dwarf::DW_OP_div, Stride)) {

      if (!pushSCEV(Stride))

        return false;

      pushOperator(llvm::dwarf::DW_OP_div);

    }

    return true;

  }


  // Append the current expression and locations to a location list and an

  // expression list. Modify the DW_OP_LLVM_arg indexes to account for

  // the locations already present in the destination list.

  void appendToVectors(SmallVectorImpl<uint64_t> &DestExpr,

                       SmallVectorImpl<Value *> &DestLocations) {

    assert(!DestLocations.empty() &&

           "Expected the locations vector to contain the IV");

    // The DWARF_OP_LLVM_arg arguments of the expression being appended must be

    // modified to account for the locations already in the destination vector.

    // All builders contain the IV as the first location op.

    assert(!LocationOps.empty() &&

           "Expected the location ops to contain the IV.");

    // DestIndexMap[n] contains the index in DestLocations for the nth

    // location in this SCEVDbgValueBuilder.

    SmallVector<uint64_t, 2> DestIndexMap;

    for (const auto &Op : LocationOps) {

      auto It = find(DestLocations, Op);

      if (It != DestLocations.end()) {

        // Location already exists in DestLocations, reuse existing ArgIndex.

        DestIndexMap.push_back(std::distance(DestLocations.begin(), It));

        continue;

      }

      // Location is not in DestLocations, add it.

      DestIndexMap.push_back(DestLocations.size());

      DestLocations.push_back(Op);

    }


    for (const auto &Op : expr_ops()) {

      if (Op.getOp() != dwarf::DW_OP_LLVM_arg) {

        Op.appendToVector(DestExpr);

        continue;

      }


      DestExpr.push_back(dwarf::DW_OP_LLVM_arg);

      // `DW_OP_LLVM_arg n` represents the nth LocationOp in this SCEV,

      // DestIndexMap[n] contains its new index in DestLocations.

      uint64_t NewIndex = DestIndexMap[Op.getArg(0)];

      DestExpr.push_back(NewIndex);

    }

  }

};


/// Holds all the required data to salvage a dbg.value using the pre-LSR SCEVs

/// and DIExpression.

struct DVIRecoveryRec {

  DVIRecoveryRec(DbgVariableRecord *DVR)

      : DbgRef(DVR), Expr(DVR->getExpression()), HadLocationArgList(false) {}


  DbgVariableRecord *DbgRef;

  DIExpression *Expr;

  bool HadLocationArgList;

  SmallVector<WeakVH, 2> LocationOps;

  SmallVector<const llvm::SCEV *, 2> SCEVs;

  SmallVector<std::unique_ptr<SCEVDbgValueBuilder>, 2> RecoveryExprs;


  void clear() {

    for (auto &RE : RecoveryExprs)

      RE.reset();

    RecoveryExprs.clear();

  }


  ~DVIRecoveryRec() { clear(); }

};

} // namespace


/// Returns the total number of DW_OP_llvm_arg operands in the expression.

/// This helps in determining if a DIArglist is necessary or can be omitted from

/// the dbg.value.

static unsigned numLLVMArgOps(SmallVectorImpl<uint64_t> &Expr) {

  auto expr_ops = ToDwarfOpIter(Expr);

  unsigned Count = 0;

  for (auto Op : expr_ops)

    if (Op.getOp() == dwarf::DW_OP_LLVM_arg)

      Count++;

  return Count;

}


/// Overwrites DVI with the location and Ops as the DIExpression. This will

/// create an invalid expression if Ops has any dwarf::DW_OP_llvm_arg operands,

/// because a DIArglist is not created for the first argument of the dbg.value.

template <typename T>

static void updateDVIWithLocation(T &DbgVal, Value *Location,

                                  SmallVectorImpl<uint64_t> &Ops) {

  assert(numLLVMArgOps(Ops) == 0 && "Expected expression that does not "

                                    "contain any DW_OP_llvm_arg operands.");

  DbgVal.setRawLocation(ValueAsMetadata::get(Location));

  DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));

  DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));

}


/// Overwrite DVI with locations placed into a DIArglist.

template <typename T>

static void updateDVIWithLocations(T &DbgVal,

                                   SmallVectorImpl<Value *> &Locations,

                                   SmallVectorImpl<uint64_t> &Ops) {

  assert(numLLVMArgOps(Ops) != 0 &&

         "Expected expression that references DIArglist locations using "

         "DW_OP_llvm_arg operands.");

  SmallVector<ValueAsMetadata *, 3> MetadataLocs;

  for (Value *V : Locations)

    MetadataLocs.push_back(ValueAsMetadata::get(V));

  auto ValArrayRef = llvm::ArrayRef<llvm::ValueAsMetadata *>(MetadataLocs);

  DbgVal.setRawLocation(llvm::DIArgList::get(DbgVal.getContext(), ValArrayRef));

  DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));

}


/// Write the new expression and new location ops for the dbg.value. If possible

/// reduce the szie of the dbg.value by omitting DIArglist. This

/// can be omitted if:

/// 1. There is only a single location, refenced by a single DW_OP_llvm_arg.

/// 2. The DW_OP_LLVM_arg is the first operand in the expression.

static void UpdateDbgValue(DVIRecoveryRec &DVIRec,

                           SmallVectorImpl<Value *> &NewLocationOps,

                           SmallVectorImpl<uint64_t> &NewExpr) {

  DbgVariableRecord *DbgVal = DVIRec.DbgRef;

  unsigned NumLLVMArgs = numLLVMArgOps(NewExpr);

  if (NumLLVMArgs == 0) {

    // Location assumed to be on the stack.

    updateDVIWithLocation(*DbgVal, NewLocationOps[0], NewExpr);

  } else if (NumLLVMArgs == 1 && NewExpr[0] == dwarf::DW_OP_LLVM_arg) {

    // There is only a single DW_OP_llvm_arg at the start of the expression,

    // so it can be omitted along with DIArglist.

    assert(NewExpr[1] == 0 &&

           "Lone LLVM_arg in a DIExpression should refer to location-op 0.");

    llvm::SmallVector<uint64_t, 6> ShortenedOps(llvm::drop_begin(NewExpr, 2));

    updateDVIWithLocation(*DbgVal, NewLocationOps[0], ShortenedOps);

  } else {

    // Multiple DW_OP_llvm_arg, so DIArgList is strictly necessary.

    updateDVIWithLocations(*DbgVal, NewLocationOps, NewExpr);

  }


  // If the DIExpression was previously empty then add the stack terminator.

  // Non-empty expressions have only had elements inserted into them and so

  // the terminator should already be present e.g. stack_value or fragment.

  DIExpression *SalvageExpr = DbgVal->getExpression();

  if (!DVIRec.Expr->isComplex() && SalvageExpr->isComplex()) {

    SalvageExpr = DIExpression::append(SalvageExpr, {dwarf::DW_OP_stack_value});

    DbgVal->setExpression(SalvageExpr);

  }

}


/// Cached location ops may be erased during LSR, in which case a poison is

/// required when restoring from the cache. The type of that location is no

/// longer available, so just use int8. The poison will be replaced by one or

/// more locations later when a SCEVDbgValueBuilder selects alternative

/// locations to use for the salvage.

static Value *getValueOrPoison(WeakVH &VH, LLVMContext &C) {

  return (VH) ? VH : PoisonValue::get(llvm::Type::getInt8Ty(C));

}


/// Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.

static void restorePreTransformState(DVIRecoveryRec &DVIRec) {

  DbgVariableRecord *DbgVal = DVIRec.DbgRef;

  LLVM_DEBUG(dbgs() << "scev-salvage: restore dbg.value to pre-LSR state\n"

                    << "scev-salvage: post-LSR: " << *DbgVal << '\n');

  assert(DVIRec.Expr && "Expected an expression");

  DbgVal->setExpression(DVIRec.Expr);


  // Even a single location-op may be inside a DIArgList and referenced with

  // DW_OP_LLVM_arg, which is valid only with a DIArgList.

  if (!DVIRec.HadLocationArgList) {

    assert(DVIRec.LocationOps.size() == 1 &&

           "Unexpected number of location ops.");

    // LSR's unsuccessful salvage attempt may have added DIArgList, which in

    // this case was not present before, so force the location back to a

    // single uncontained Value.

    Value *CachedValue =

        getValueOrPoison(DVIRec.LocationOps[0], DbgVal->getContext());

    DbgVal->setRawLocation(ValueAsMetadata::get(CachedValue));

  } else {

    SmallVector<ValueAsMetadata *, 3> MetadataLocs;

    for (WeakVH VH : DVIRec.LocationOps) {

      Value *CachedValue = getValueOrPoison(VH, DbgVal->getContext());

      MetadataLocs.push_back(ValueAsMetadata::get(CachedValue));

    }

    auto ValArrayRef = llvm::ArrayRef<llvm::ValueAsMetadata *>(MetadataLocs);

    DbgVal->setRawLocation(

        llvm::DIArgList::get(DbgVal->getContext(), ValArrayRef));

  }

  LLVM_DEBUG(dbgs() << "scev-salvage: pre-LSR: " << *DbgVal << '\n');

}


static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE,

                       llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec,

                       const SCEV *SCEVInductionVar,

                       SCEVDbgValueBuilder IterCountExpr) {


  if (!DVIRec.DbgRef->isKillLocation())

    return false;


  // LSR may have caused several changes to the dbg.value in the failed salvage

  // attempt. So restore the DIExpression, the location ops and also the

  // location ops format, which is always DIArglist for multiple ops, but only

  // sometimes for a single op.

  restorePreTransformState(DVIRec);


  // LocationOpIndexMap[i] will store the post-LSR location index of

  // the non-optimised out location at pre-LSR index i.

  SmallVector<int64_t, 2> LocationOpIndexMap;

  LocationOpIndexMap.assign(DVIRec.LocationOps.size(), -1);

  SmallVector<Value *, 2> NewLocationOps;

  NewLocationOps.push_back(LSRInductionVar);


  for (unsigned i = 0; i < DVIRec.LocationOps.size(); i++) {

    WeakVH VH = DVIRec.LocationOps[i];

    // Place the locations not optimised out in the list first, avoiding

    // inserts later. The map is used to update the DIExpression's

    // DW_OP_LLVM_arg arguments as the expression is updated.

    if (VH && !isa<UndefValue>(VH)) {

      NewLocationOps.push_back(VH);

      LocationOpIndexMap[i] = NewLocationOps.size() - 1;

      LLVM_DEBUG(dbgs() << "scev-salvage: Location index " << i

                        << " now at index " << LocationOpIndexMap[i] << "\n");

      continue;

    }


    // It's possible that a value referred to in the SCEV may have been

    // optimised out by LSR.

    if (SE.containsErasedValue(DVIRec.SCEVs[i]) ||

        SE.containsUndefs(DVIRec.SCEVs[i])) {

      LLVM_DEBUG(dbgs() << "scev-salvage: SCEV for location at index: " << i

                        << " refers to a location that is now undef or erased. "

                           "Salvage abandoned.\n");

      return false;

    }


    LLVM_DEBUG(dbgs() << "scev-salvage: salvaging location at index " << i

                      << " with SCEV: " << *DVIRec.SCEVs[i] << "\n");


    DVIRec.RecoveryExprs[i] = std::make_unique<SCEVDbgValueBuilder>();

    SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs[i].get();


    // Create an offset-based salvage expression if possible, as it requires

    // less DWARF ops than an iteration count-based expression.

    if (std::optional<APInt> Offset =

            SE.computeConstantDifference(DVIRec.SCEVs[i], SCEVInductionVar)) {

      if (Offset->getSignificantBits() <= 64)

        SalvageExpr->createOffsetExpr(Offset->getSExtValue(), LSRInductionVar);

      else

        return false;

    } else if (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr,

                                                 SE))

      return false;

  }


  // Merge the DbgValueBuilder generated expressions and the original

  // DIExpression, place the result into an new vector.

  SmallVector<uint64_t, 3> NewExpr;

  if (DVIRec.Expr->getNumElements() == 0) {

    assert(DVIRec.RecoveryExprs.size() == 1 &&

           "Expected only a single recovery expression for an empty "

           "DIExpression.");

    assert(DVIRec.RecoveryExprs[0] &&

           "Expected a SCEVDbgSalvageBuilder for location 0");

    SCEVDbgValueBuilder *B = DVIRec.RecoveryExprs[0].get();

    B->appendToVectors(NewExpr, NewLocationOps);

  }

  for (const auto &Op : DVIRec.Expr->expr_ops()) {

    // Most Ops needn't be updated.

    if (Op.getOp() != dwarf::DW_OP_LLVM_arg) {

      Op.appendToVector(NewExpr);

      continue;

    }


    uint64_t LocationArgIndex = Op.getArg(0);

    SCEVDbgValueBuilder *DbgBuilder =

        DVIRec.RecoveryExprs[LocationArgIndex].get();

    // The location doesn't have s SCEVDbgValueBuilder, so LSR did not

    // optimise it away. So just translate the argument to the updated

    // location index.

    if (!DbgBuilder) {

      NewExpr.push_back(dwarf::DW_OP_LLVM_arg);

      assert(LocationOpIndexMap[Op.getArg(0)] != -1 &&

             "Expected a positive index for the location-op position.");

      NewExpr.push_back(LocationOpIndexMap[Op.getArg(0)]);

      continue;

    }

    // The location has a recovery expression.

    DbgBuilder->appendToVectors(NewExpr, NewLocationOps);

  }


  UpdateDbgValue(DVIRec, NewLocationOps, NewExpr);

  LLVM_DEBUG(dbgs() << "scev-salvage: Updated DVI: " << *DVIRec.DbgRef << "\n");

  return true;

}


/// Obtain an expression for the iteration count, then attempt to salvage the

/// dbg.value intrinsics.

static void DbgRewriteSalvageableDVIs(

    llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar,

    SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &DVIToUpdate) {

  if (DVIToUpdate.empty())

    return;


  const llvm::SCEV *SCEVInductionVar = SE.getSCEV(LSRInductionVar);

  assert(SCEVInductionVar &&

         "Anticipated a SCEV for the post-LSR induction variable");


  if (const SCEVAddRecExpr *IVAddRec =

          dyn_cast<SCEVAddRecExpr>(SCEVInductionVar)) {

    if (!IVAddRec->isAffine())

      return;


    // Prevent translation using excessive resources.

    if (IVAddRec->getExpressionSize() > MaxSCEVSalvageExpressionSize)

      return;


    // The iteration count is required to recover location values.

    SCEVDbgValueBuilder IterCountExpr;

    IterCountExpr.pushLocation(LSRInductionVar);

    if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE))

      return;


    LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV: " << *SCEVInductionVar

                      << '\n');


    for (auto &DVIRec : DVIToUpdate) {

      SalvageDVI(L, SE, LSRInductionVar, *DVIRec, SCEVInductionVar,

                 IterCountExpr);

    }

  }

}


/// Identify and cache salvageable DVI locations and expressions along with the

/// corresponding SCEV(s). Also ensure that the DVI is not deleted between

/// cacheing and salvaging.

static void DbgGatherSalvagableDVI(

    Loop *L, ScalarEvolution &SE,

    SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &SalvageableDVISCEVs) {

  for (const auto &B : L->getBlocks()) {

    for (auto &I : *B) {

      for (DbgVariableRecord &DbgVal : filterDbgVars(I.getDbgRecordRange())) {

        if (!DbgVal.isDbgValue() && !DbgVal.isDbgAssign())

          continue;


        // Ensure that if any location op is undef that the dbg.vlue is not

        // cached.

        if (DbgVal.isKillLocation())

          continue;


        // Check that the location op SCEVs are suitable for translation to

        // DIExpression.

        const auto &HasTranslatableLocationOps =

            [&](const DbgVariableRecord &DbgValToTranslate) -> bool {

          for (const auto LocOp : DbgValToTranslate.location_ops()) {

            if (!LocOp)

              return false;


            if (!SE.isSCEVable(LocOp->getType()))

              return false;


            const SCEV *S = SE.getSCEV(LocOp);

            if (SE.containsUndefs(S))

              return false;

          }

          return true;

        };


        if (!HasTranslatableLocationOps(DbgVal))

          continue;


        std::unique_ptr<DVIRecoveryRec> NewRec =

            std::make_unique<DVIRecoveryRec>(&DbgVal);

        // Each location Op may need a SCEVDbgValueBuilder in order to recover

        // it. Pre-allocating a vector will enable quick lookups of the builder

        // later during the salvage.

        NewRec->RecoveryExprs.resize(DbgVal.getNumVariableLocationOps());

        for (const auto LocOp : DbgVal.location_ops()) {

          NewRec->SCEVs.push_back(SE.getSCEV(LocOp));

          NewRec->LocationOps.push_back(LocOp);

          NewRec->HadLocationArgList = DbgVal.hasArgList();

        }

        SalvageableDVISCEVs.push_back(std::move(NewRec));

      }

    }

  }

}


/// Ideally pick the PHI IV inserted by ScalarEvolutionExpander. As a fallback

/// any PHi from the loop header is usable, but may have less chance of

/// surviving subsequent transforms.

static llvm::PHINode *GetInductionVariable(const Loop &L, ScalarEvolution &SE,

                                           const LSRInstance &LSR) {


  auto IsSuitableIV = [&](PHINode *P) {

    if (!SE.isSCEVable(P->getType()))

      return false;

    if (const SCEVAddRecExpr *Rec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(P)))

      return Rec->isAffine() && !SE.containsUndefs(SE.getSCEV(P));

    return false;

  };


  // For now, just pick the first IV that was generated and inserted by

  // ScalarEvolution. Ideally pick an IV that is unlikely to be optimised away

  // by subsequent transforms.

  for (const WeakVH &IV : LSR.getScalarEvolutionIVs()) {

    if (!IV)

      continue;


    // There should only be PHI node IVs.

    PHINode *P = cast<PHINode>(&*IV);


    if (IsSuitableIV(P))

      return P;

  }


  for (PHINode &P : L.getHeader()->phis()) {

    if (IsSuitableIV(&P))

      return &P;

  }

  return nullptr;

}


static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,

                               DominatorTree &DT, LoopInfo &LI,

                               const TargetTransformInfo &TTI,

                               AssumptionCache &AC, TargetLibraryInfo &TLI,

                               MemorySSA *MSSA) {


  // Debug preservation - before we start removing anything identify which DVI

  // meet the salvageable criteria and store their DIExpression and SCEVs.

  SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> SalvageableDVIRecords;

  DbgGatherSalvagableDVI(L, SE, SalvageableDVIRecords);


  bool Changed = false;

  std::unique_ptr<MemorySSAUpdater> MSSAU;

  if (MSSA)

    MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);


  // Run the main LSR transformation.

  const LSRInstance &Reducer =

      LSRInstance(L, IU, SE, DT, LI, TTI, AC, TLI, MSSAU.get());

  Changed |= Reducer.getChanged();


  // Remove any extra phis created by processing inner loops.

  Changed |= DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());

  if (EnablePhiElim && L->isLoopSimplifyForm()) {

    SmallVector<WeakTrackingVH, 16> DeadInsts;

    const DataLayout &DL = L->getHeader()->getDataLayout();

    SCEVExpander Rewriter(SE, DL, "lsr", false);

#if LLVM_ENABLE_ABI_BREAKING_CHECKS

    Rewriter.setDebugType(DEBUG_TYPE);

#endif

    unsigned numFolded = Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &TTI);

    Rewriter.clear();

    if (numFolded) {

      Changed = true;

      RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, &TLI,

                                                           MSSAU.get());

      DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());

    }

  }

  // LSR may at times remove all uses of an induction variable from a loop.

  // The only remaining use is the PHI in the exit block.

  // When this is the case, if the exit value of the IV can be calculated using

  // SCEV, we can replace the exit block PHI with the final value of the IV and

  // skip the updates in each loop iteration.

  if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) {

    SmallVector<WeakTrackingVH, 16> DeadInsts;

    const DataLayout &DL = L->getHeader()->getDataLayout();

    SCEVExpander Rewriter(SE, DL, "lsr", true);

    int Rewrites = rewriteLoopExitValues(L, &LI, &TLI, &SE, &TTI, Rewriter, &DT,

                                         UnusedIndVarInLoop, DeadInsts);

    Rewriter.clear();

    if (Rewrites) {

      Changed = true;

      RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, &TLI,

                                                           MSSAU.get());

      DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());

    }

  }


  if (SalvageableDVIRecords.empty())

    return Changed;


  // Obtain relevant IVs and attempt to rewrite the salvageable DVIs with

  // expressions composed using the derived iteration count.

  // TODO: Allow for multiple IV references for nested AddRecSCEVs

  for (const auto &L : LI) {

    if (llvm::PHINode *IV = GetInductionVariable(*L, SE, Reducer))

      DbgRewriteSalvageableDVIs(L, SE, IV, SalvageableDVIRecords);

    else {

      LLVM_DEBUG(dbgs() << "scev-salvage: SCEV salvaging not possible. An IV "

                           "could not be identified.\n");

    }

  }


  for (auto &Rec : SalvageableDVIRecords)

    Rec->clear();

  SalvageableDVIRecords.clear();

  return Changed;

}


bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {

  if (skipLoop(L))

    return false;


  auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();

  auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();

  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();

  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();

  const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(

      *L->getHeader()->getParent());

  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(

      *L->getHeader()->getParent());

  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(

      *L->getHeader()->getParent());

  auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();

  MemorySSA *MSSA = nullptr;

  if (MSSAAnalysis)

    MSSA = &MSSAAnalysis->getMSSA();

  return ReduceLoopStrength(L, IU, SE, DT, LI, TTI, AC, TLI, MSSA);

}


PreservedAnalyses LoopStrengthReducePass::run(Loop &L, LoopAnalysisManager &AM,

                                              LoopStandardAnalysisResults &AR,

                                              LPMUpdater &) {

  if (!ReduceLoopStrength(&L, AM.getResult<IVUsersAnalysis>(L, AR), AR.SE,

                          AR.DT, AR.LI, AR.TTI, AR.AC, AR.TLI, AR.MSSA))

    return PreservedAnalyses::all();


  auto PA = getLoopPassPreservedAnalyses();

  if (AR.MSSA)

    PA.preserve<MemorySSAAnalysis>();

  return PA;

}


char LoopStrengthReduce::ID = 0;


INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",

                      "Loop Strength Reduction", false, false)

INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)

INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)

INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)

INITIALIZE_PASS_DEPENDENCY(IVUsersWrapperPass)

INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)

INITIALIZE_PASS_DEPENDENCY(LoopSimplify)

INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",

                    "Loop Strength Reduction", false, false)


Pass *llvm::createLoopStrengthReducePass() { return new LoopStrengthReduce(); }

Success
#define Success
Definition: AArch64Disassembler.cpp:42

for
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
Definition: AArch64ExpandPseudoInsts.cpp:114

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

APInt.h
This file implements a class to represent arbitrary precision integral constant values and operations...

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: ARMSLSHardening.cpp:73

print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:205

AssumptionCache.h

getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:882

BasicBlockUtils.h

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

Casting.h

CommandLine.h

clEnumValN
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:687

Compiler.h

LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:638

Constants.h
This file contains the declarations for the subclasses of Constant, which represent the different fla...

Idx
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Definition: DeadArgumentElimination.cpp:347

isCanonical
static bool isCanonical(const MDString *S)
Definition: DebugInfoMetadata.cpp:551

DebugInfoMetadata.h

DenseMap.h
This file defines the DenseMap class.

DenseSet.h
This file defines the DenseSet and SmallDenseSet classes.

DerivedTypes.h

DomTreeUpdater.h

Dominators.h

Dwarf.h
This file contains constants used for implementing Dwarf debug support.

Other
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1328

End
bool End
Definition: ELF_riscv.cpp:480

X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")

GlobalValue.h

Loops
Hexagon Hardware Loops
Definition: HexagonHardwareLoops.cpp:367

IRBuilder.h

BasicBlock.h

Constant.h

Instruction.h

IntrinsicInst.h

Module.h
Module.h This file contains the declarations for the Module class.

Operator.h

Type.h

Use.h
This defines the Use class.

User.h

Value.h

Users
iv Induction Variable Users
Definition: IVUsers.cpp:48

IVUsers.h

InitializePasses.h

InstrTypes.h

Instructions.h

isZero
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:546

LoopAnalysisManager.h
This header provides classes for managing per-loop analyses.

LoopInfo.h

LoopPass.h

SalvageDVI
static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec, const SCEV *SCEVInductionVar, SCEVDbgValueBuilder IterCountExpr)
Definition: LoopStrengthReduce.cpp:6775

DropScaledForVScale
static cl::opt< bool > DropScaledForVScale("lsr-drop-scaled-reg-for-vscale", cl::Hidden, cl::init(true), cl::desc("Avoid using scaled registers with vscale-relative addressing"))

getWideOperand
static Value * getWideOperand(Value *Oper)
IVChain logic must consistently peek base TruncInst operands, so wrap it in a convenient helper.
Definition: LoopStrengthReduce.cpp:2971

isAddSExtable
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE)
Return true if the given add can be sign-extended without changing its value.
Definition: LoopStrengthReduce.cpp:812

mayUsePostIncMode
static bool mayUsePostIncMode(const TargetTransformInfo &TTI, LSRUse &LU, const SCEV *S, const Loop *L, ScalarEvolution &SE)
Return true if the SCEV represents a value that may end up as a post-increment operation.
Definition: LoopStrengthReduce.cpp:3880

restorePreTransformState
static void restorePreTransformState(DVIRecoveryRec &DVIRec)
Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.
Definition: LoopStrengthReduce.cpp:6744

ExtractImmediate
static Immediate ExtractImmediate(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a constant integer value, return that integer value,...
Definition: LoopStrengthReduce.cpp:937

containsAddRecDependentOnLoop
static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L)
Definition: LoopStrengthReduce.cpp:617

findIVOperand
static User::op_iterator findIVOperand(User::op_iterator OI, User::op_iterator OE, Loop *L, ScalarEvolution &SE)
Helper for CollectChains that finds an IV operand (computed by an AddRec in this loop) within [OI,...
Definition: LoopStrengthReduce.cpp:2952

isLegalUse
static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset, Immediate MaxOffset, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg, int64_t Scale)
Test whether we know how to expand the current formula.
Definition: LoopStrengthReduce.cpp:1904

DbgGatherSalvagableDVI
static void DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &SalvageableDVISCEVs)
Identify and cache salvageable DVI locations and expressions along with the corresponding SCEV(s).
Definition: LoopStrengthReduce.cpp:6919

isMulSExtable
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE)
Return true if the given mul can be sign-extended without changing its value.
Definition: LoopStrengthReduce.cpp:820

MaxSCEVSalvageExpressionSize
static const unsigned MaxSCEVSalvageExpressionSize
Limit the size of expression that SCEV-based salvaging will attempt to translate into a DIExpression.
Definition: LoopStrengthReduce.cpp:144

isExistingPhi
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if this AddRec is already a phi in its loop.
Definition: LoopStrengthReduce.cpp:1092

getScalingFactorCost
static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L)
Definition: LoopStrengthReduce.cpp:1950

InsnsCost
static cl::opt< bool > InsnsCost("lsr-insns-cost", cl::Hidden, cl::init(true), cl::desc("Add instruction count to a LSR cost model"))

StressIVChain
static cl::opt< bool > StressIVChain("stress-ivchain", cl::Hidden, cl::init(false), cl::desc("Stress test LSR IV chains"))

isAddressUse
static bool isAddressUse(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Returns true if the specified instruction is using the specified value as an address.
Definition: LoopStrengthReduce.cpp:994

ExtractSymbol
static GlobalValue * ExtractSymbol(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a GlobalValue address, return that symbol, and mutate S to point to a n...
Definition: LoopStrengthReduce.cpp:968

updateDVIWithLocation
static void updateDVIWithLocation(T &DbgVal, Value *Location, SmallVectorImpl< uint64_t > &Ops)
Overwrites DVI with the location and Ops as the DIExpression.
Definition: LoopStrengthReduce.cpp:6674

isLegalAddImmediate
static bool isLegalAddImmediate(const TargetTransformInfo &TTI, Immediate Offset)
Definition: LoopStrengthReduce.cpp:1925

AllowDropSolutionIfLessProfitable
static cl::opt< cl::boolOrDefault > AllowDropSolutionIfLessProfitable("lsr-drop-solution", cl::Hidden, cl::desc("Attempt to drop solution if it is less profitable"))

EnableVScaleImmediates
static cl::opt< bool > EnableVScaleImmediates("lsr-enable-vscale-immediates", cl::Hidden, cl::init(true), cl::desc("Enable analysis of vscale-relative immediates in LSR"))

PreferredAddresingMode
static cl::opt< TTI::AddressingModeKind > PreferredAddresingMode("lsr-preferred-addressing-mode", cl::Hidden, cl::init(TTI::AMK_None), cl::desc("A flag that overrides the target's preferred addressing mode."), cl::values(clEnumValN(TTI::AMK_None, "none", "Don't prefer any addressing mode"), clEnumValN(TTI::AMK_PreIndexed, "preindexed", "Prefer pre-indexed addressing mode"), clEnumValN(TTI::AMK_PostIndexed, "postindexed", "Prefer post-indexed addressing mode")))

getExprBase
static const SCEV * getExprBase(const SCEV *S)
Return an approximation of this SCEV expression's "base", or NULL for any constant.
Definition: LoopStrengthReduce.cpp:2987

isAlwaysFoldable
static bool isAlwaysFoldable(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg)
Definition: LoopStrengthReduce.cpp:1995

GetInductionVariable
static llvm::PHINode * GetInductionVariable(const Loop &L, ScalarEvolution &SE, const LSRInstance &LSR)
Ideally pick the PHI IV inserted by ScalarEvolutionExpander.
Definition: LoopStrengthReduce.cpp:6974

IsSimplerBaseSCEVForTarget
static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI, ScalarEvolution &SE, const SCEV *Best, const SCEV *Reg, MemAccessTy AccessType)
Definition: LoopStrengthReduce.cpp:5286

reduce
loop reduce
Definition: LoopStrengthReduce.cpp:7130

MaxIVUsers
static const unsigned MaxIVUsers
MaxIVUsers is an arbitrary threshold that provides an early opportunity for bail out.
Definition: LoopStrengthReduce.cpp:138

isHighCostExpansion
static bool isHighCostExpansion(const SCEV *S, SmallPtrSetImpl< const SCEV * > &Processed, ScalarEvolution &SE)
Check if expanding this expression is likely to incur significant cost.
Definition: LoopStrengthReduce.cpp:1112

getValueOrPoison
static Value * getValueOrPoison(WeakVH &VH, LLVMContext &C)
Cached location ops may be erased during LSR, in which case a poison is required when restoring from ...
Definition: LoopStrengthReduce.cpp:6739

getAccessType
static MemAccessTy getAccessType(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Return the type of the memory being accessed.
Definition: LoopStrengthReduce.cpp:1039

numLLVMArgOps
static unsigned numLLVMArgOps(SmallVectorImpl< uint64_t > &Expr)
Returns the total number of DW_OP_llvm_arg operands in the expression.
Definition: LoopStrengthReduce.cpp:6661

DbgRewriteSalvageableDVIs
static void DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &DVIToUpdate)
Obtain an expression for the iteration count, then attempt to salvage the dbg.value intrinsics.
Definition: LoopStrengthReduce.cpp:6881

EnablePhiElim
static cl::opt< bool > EnablePhiElim("enable-lsr-phielim", cl::Hidden, cl::init(true), cl::desc("Enable LSR phi elimination"))

UpdateDbgValue
static void UpdateDbgValue(DVIRecoveryRec &DVIRec, SmallVectorImpl< Value * > &NewLocationOps, SmallVectorImpl< uint64_t > &NewExpr)
Write the new expression and new location ops for the dbg.value.
Definition: LoopStrengthReduce.cpp:6704

isAddRecSExtable
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if the given addrec can be sign-extended without changing its value.
Definition: LoopStrengthReduce.cpp:804

canHoistIVInc
static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup, const LSRUse &LU, Instruction *IVIncInsertPos, Loop *L)
Definition: LoopStrengthReduce.cpp:6006

DoInitialMatch
static void DoInitialMatch(const SCEV *S, Loop *L, SmallVectorImpl< const SCEV * > &Good, SmallVectorImpl< const SCEV * > &Bad, ScalarEvolution &SE)
Recursion helper for initialMatch.
Definition: LoopStrengthReduce.cpp:542

isAMCompletelyFolded
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F)
Check if the addressing mode defined by F is completely folded in LU at isel time.
Definition: LoopStrengthReduce.cpp:1933

LSRExpNarrow
static cl::opt< bool > LSRExpNarrow("lsr-exp-narrow", cl::Hidden, cl::init(false), cl::desc("Narrow LSR complex solution using" " expectation of registers number"))

FilterSameScaledReg
static cl::opt< bool > FilterSameScaledReg("lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Narrow LSR search space by filtering non-optimal formulae" " with the same ScaledReg and Scale"))

updateDVIWithLocations
static void updateDVIWithLocations(T &DbgVal, SmallVectorImpl< Value * > &Locations, SmallVectorImpl< uint64_t > &Ops)
Overwrite DVI with locations placed into a DIArglist.
Definition: LoopStrengthReduce.cpp:6685

ComplexityLimit
static cl::opt< unsigned > ComplexityLimit("lsr-complexity-limit", cl::Hidden, cl::init(std::numeric_limits< uint16_t >::max()), cl::desc("LSR search space complexity limit"))

ReduceLoopStrength
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC, TargetLibraryInfo &TLI, MemorySSA *MSSA)
Definition: LoopStrengthReduce.cpp:7006

isProfitableChain
static bool isProfitableChain(IVChain &Chain, SmallPtrSetImpl< Instruction * > &Users, ScalarEvolution &SE, const TargetTransformInfo &TTI)
Return true if the number of registers needed for the chain is estimated to be less than the number r...
Definition: LoopStrengthReduce.cpp:3054

CollectSubexprs
static const SCEV * CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl< const SCEV * > &Ops, const Loop *L, ScalarEvolution &SE, unsigned Depth=0)
Split S into subexpressions which can be pulled out into separate registers.
Definition: LoopStrengthReduce.cpp:3825

getExactSDiv
static const SCEV * getExactSDiv(const SCEV *LHS, const SCEV *RHS, ScalarEvolution &SE, bool IgnoreSignificantBits=false)
Return an expression for LHS /s RHS, if it can be determined and if the remainder is known to be zero...
Definition: LoopStrengthReduce.cpp:832

canFoldIVIncExpr
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, Value *Operand, const TargetTransformInfo &TTI)
Return true if the IVInc can be folded into an addressing mode.
Definition: LoopStrengthReduce.cpp:3346

DEBUG_TYPE
#define DEBUG_TYPE
Definition: LoopStrengthReduce.cpp:132

getAnyExtendConsideringPostIncUses
static const SCEV * getAnyExtendConsideringPostIncUses(ArrayRef< PostIncLoopSet > Loops, const SCEV *Expr, Type *ToTy, ScalarEvolution &SE)
Extend/Truncate Expr to ToTy considering post-inc uses in Loops.
Definition: LoopStrengthReduce.cpp:4378

getSetupCost
static unsigned getSetupCost(const SCEV *Reg, unsigned Depth)
Definition: LoopStrengthReduce.cpp:1377

SetupCostDepthLimit
static cl::opt< unsigned > SetupCostDepthLimit("lsr-setupcost-depth-limit", cl::Hidden, cl::init(7), cl::desc("The limit on recursion depth for LSRs setup cost"))

LoopStrengthReduce.h

LoopUtils.h

F
#define F(x, y, z)
Definition: MD5.cpp:55

I
#define I(x, y, z)
Definition: MD5.cpp:58

G
#define G(x, y, z)
Definition: MD5.cpp:56

Reg
Register Reg
Definition: MachineSink.cpp:2117

MathExtras.h

MemorySSAUpdater.h

MemorySSA.h
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...

II
uint64_t IntrinsicInst * II
Definition: NVVMIntrRange.cpp:46

P
#define P(N)

Fixup
PowerPC TLS Dynamic Call Fixup
Definition: PPCTLSDynamicCall.cpp:336

INITIALIZE_PASS_DEPENDENCY
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:42

INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:44

INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:39

Pass.h

PointerIntPair.h
This file defines the PointerIntPair class.

Cond
const SmallVectorImpl< MachineOperand > & Cond
Definition: RISCVRedundantCopyElimination.cpp:71

Uses
Remove Loads Into Fake Uses
Definition: RemoveLoadsIntoFakeUses.cpp:81

isValid
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
Definition: RustDemangle.cpp:181

RA
SI optimize exec mask operations pre RA
Definition: SIOptimizeExecMaskingPreRA.cpp:79

Address
@ Address
Definition: SPIRVEmitNonSemanticDI.cpp:58

STLExtras.h
This file contains some templates that are useful if you are working with the STL at all.

OS
raw_pwrite_stream & OS
Definition: SampleProfWriter.cpp:51

ScalarEvolutionExpander.h

ScalarEvolutionExpressions.h

ScalarEvolutionNormalization.h

ScalarEvolutionPatternMatch.h

ScalarEvolution.h

Scalar.h

SetVector.h
This file implements a set that has insertion order iteration characteristics.

SmallBitVector.h
This file implements the SmallBitVector class.

SmallPtrSet.h
This file defines the SmallPtrSet class.

SmallSet.h
This file defines the SmallSet class.

SmallVector.h
This file defines the SmallVector class.

Statistic.h
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

Debug.h

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition: Debug.h:119

getType
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39

TargetLibraryInfo.h

TargetTransformInfo.h
This pass exposes codegen information to IR-level passes.

Local.h

Utils.h

ValueHandle.h

ValueTracking.h

Rewriter
Virtual Register Rewriter
Definition: VirtRegMap.cpp:269

RHS
Value * RHS
Definition: X86PartialReduction.cpp:74

LHS
Value * LHS
Definition: X86PartialReduction.cpp:73

Mul
BinaryOperator * Mul
Definition: X86PartialReduction.cpp:68

IV
static const uint32_t IV[8]
Definition: blake3_impl.h:83

NewExpr
Definition: ItaniumDemangle.h:2119

const_iterator

T

llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:78

llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1540

llvm::APInt::isNegative
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:329

llvm::APInt::sdiv
LLVM_ABI APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
Definition: APInt.cpp:1644

llvm::APInt::getSignificantBits
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1531

llvm::APInt::srem
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition: APInt.cpp:1736

llvm::APInt::getSExtValue
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1562

llvm::AddOperator
Definition: Operator.h:400

llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:255

llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:412

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:48

llvm::AnalysisUsage::addRequiredID
LLVM_ABI AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:284

llvm::AnalysisUsage::addPreservedID
AnalysisUsage & addPreservedID(const void *ID)
Definition: PassAnalysisSupport.h:89

llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:76

llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:99

llvm::Any
Definition: Any.h:28

llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41

llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:205

llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:43

llvm::AtomicCmpXchgInst
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:506

llvm::AtomicRMWInst
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:709

llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:62

llvm::BasicBlock::phis
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:528

llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:170

llvm::BasicBlock::moveBefore
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition: BasicBlock.h:386

llvm::BasicBlock::isLandingPad
LLVM_ABI bool isLandingPad() const
Return true if this basic block is a landing pad.
Definition: BasicBlock.cpp:661

llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:233

llvm::BinaryOperator
Definition: InstrTypes.h:171

llvm::BinaryOperator::getOpcode
BinaryOps getOpcode() const
Definition: InstrTypes.h:374

llvm::BinaryOperator::Create
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Definition: Instructions.cpp:2703

llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3057

llvm::BranchInst::isUnconditional
bool isUnconditional() const
Definition: Instructions.h:3130

llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3133

llvm::CastInst::getCastOpcode
static LLVM_ABI Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
Definition: Instructions.cpp:3212

llvm::CastInst::Create
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Definition: Instructions.cpp:3039

llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:678

llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:699

llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:700

llvm::CmpInst::getInversePredicate
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:791

llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:87

llvm::ConstantInt::isValueValidForType
static LLVM_ABI bool isValueValidForType(Type *Ty, uint64_t V)
This static method returns true if the type Ty is big enough to represent the value V.
Definition: Constants.cpp:1602

llvm::ConstantInt::getSigned
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.h:131

llvm::ConstantInt::getSExtValue
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:169

llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:163

llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:43

llvm::DIArgList::get
static LLVM_ABI DIArgList * get(LLVMContext &Context, ArrayRef< ValueAsMetadata * > Args)
Definition: DebugInfoMetadata.cpp:2574

llvm::DIExpression::expr_op_iterator
An iterator for expression operands.
Definition: DebugInfoMetadata.h:3388

llvm::DIExpression
DWARF expression.
Definition: DebugInfoMetadata.h:3299

llvm::DIExpression::append
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
Definition: DebugInfoMetadata.cpp:2207

llvm::DIExpression::appendOffset
static LLVM_ABI void appendOffset(SmallVectorImpl< uint64_t > &Ops, int64_t Offset)
Append Ops with operations to apply the Offset.
Definition: DebugInfoMetadata.cpp:1980

llvm::DIExpression::isComplex
LLVM_ABI bool isComplex() const
Return whether the location is computed on the expression stack, meaning it cannot be a simple regist...
Definition: DebugInfoMetadata.cpp:1799

llvm::DWARFExpression::Operation
This class represents an Operation in the Expression.
Definition: DWARFExpression.h:33

llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63

llvm::DbgRecord::getContext
LLVM_ABI LLVMContext & getContext()
Definition: DebugProgramInstruction.cpp:530

llvm::DbgVariableRecord
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Definition: DebugProgramInstruction.h:277

llvm::DbgVariableRecord::setRawLocation
void setRawLocation(Metadata *NewLocation)
Use of this should generally be avoided; instead, replaceVariableLocationOp and addVariableLocationOp...
Definition: DebugProgramInstruction.h:475

llvm::DbgVariableRecord::setExpression
void setExpression(DIExpression *NewExpr)
Definition: DebugProgramInstruction.h:458

llvm::DbgVariableRecord::getExpression
DIExpression * getExpression() const
Definition: DebugProgramInstruction.h:459

llvm::DenseMapBase::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:230

llvm::DenseMapBase::clear
void clear()
Definition: DenseMap.h:131

llvm::DenseMap
Definition: DenseMap.h:730

llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:263

llvm::DomTreeNodeBase< BasicBlock >

llvm::DomTreeNodeBase::getBlock
NodeT * getBlock() const
Definition: GenericDomTree.h:89

llvm::DomTreeUpdater
Definition: DomTreeUpdater.h:34

llvm::DominatorTreeBase::getNode
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Definition: GenericDomTree.h:401

llvm::DominatorTreeBase::properlyDominates
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Definition: GenericDomTree.h:443

llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:322

llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:165

llvm::DominatorTree::findNearestCommonDominator
LLVM_ABI Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
Definition: Dominators.cpp:357

llvm::DominatorTree::dominates
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:135

llvm::GlobalValue
Definition: GlobalValue.h:49

llvm::ICmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1177

llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2780

llvm::IVStrideUse
IVStrideUse - Keep track of one use of a strided induction variable.
Definition: IVUsers.h:35

llvm::IVStrideUse::transformToPostInc
void transformToPostInc(const Loop *L)
transformToPostInc - Transform the expression to post-inc form for the given loop.
Definition: IVUsers.cpp:365

llvm::IVStrideUse::getOperandValToReplace
Value * getOperandValToReplace() const
getOperandValToReplace - Return the Value of the operand in the user instruction that this IVStrideUs...
Definition: IVUsers.h:54

llvm::IVStrideUse::setUser
void setUser(Instruction *NewUser)
setUser - Assign a new user instruction for this use.
Definition: IVUsers.h:48

llvm::IVUsersAnalysis
Analysis pass that exposes the IVUsers for a loop.
Definition: IVUsers.h:184

llvm::IVUsersWrapperPass
Definition: IVUsers.h:163

llvm::IVUsers
Definition: IVUsers.h:91

llvm::IVUsers::const_iterator
ilist< IVStrideUse >::const_iterator const_iterator
Definition: IVUsers.h:142

llvm::IVUsers::empty
bool empty() const
Definition: IVUsers.h:147

llvm::Init
Definition: Record.h:286

llvm::InstructionCost
Definition: InstructionCost.h:30

llvm::InstructionCost::print
LLVM_ABI void print(raw_ostream &OS) const
Definition: InstructionCost.cpp:19

llvm::InstructionCost::getValue
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
Definition: InstructionCost.h:88

llvm::InstructionCost::isValid
bool isValid() const
Definition: InstructionCost.h:80

llvm::Instruction
Definition: Instruction.h:69

llvm::Instruction::isLifetimeStartOrEnd
LLVM_ABI bool isLifetimeStartOrEnd() const LLVM_READONLY
Return true if the instruction is a llvm.lifetime.start or llvm.lifetime.end marker.
Definition: Instruction.cpp:1231

llvm::Instruction::getNumSuccessors
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
Definition: Instruction.cpp:1280

llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:513

llvm::Instruction::moveBefore
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:184

llvm::Instruction::isEHPad
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:879

llvm::Instruction::eraseFromParent
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:104

llvm::Instruction::getAccessType
LLVM_ABI Type * getAccessType() const LLVM_READONLY
Return the type this instruction accesses in memory, if any.
Definition: Instruction.cpp:1127

llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:312

llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:510

llvm::Instruction::getDataLayout
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:86

llvm::IntegerType::get
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:319

llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:49

llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68

llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:226

llvm::LPPassManager
Definition: LoopPass.h:77

llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:180

llvm::LoopBase::getExitingBlocks
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: GenericLoopInfoImpl.h:33

llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: GenericLoopInfo.h:90

llvm::LoopBase::getLoopDepth
unsigned getLoopDepth() const
Return the nesting level of this loop.
Definition: GenericLoopInfo.h:82

llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:597

llvm::LoopInfo
Definition: LoopInfo.h:409

llvm::LoopPass
Definition: LoopPass.h:29

llvm::LoopPass::runOnLoop
virtual bool runOnLoop(Loop *L, LPPassManager &LPM)=0

llvm::LoopStrengthReducePass::run
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LoopStrengthReduce.cpp:7107

llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:40

llvm::MemorySSAAnalysis
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:936

llvm::MemorySSAUpdater
Definition: MemorySSAUpdater.h:55

llvm::MemorySSAWrapperPass
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:993

llvm::MemorySSA
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:702

llvm::PHINode
Definition: Instructions.h:2638

llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2773

llvm::PHINode::blocks
iterator_range< const_block_iterator > blocks() const
Definition: Instructions.h:2699

llvm::PHINode::incoming_values
op_range incoming_values()
Definition: Instructions.h:2703

llvm::PHINode::setIncomingValue
void setIncomingValue(unsigned i, Value *V)
Definition: Instructions.h:2716

llvm::PHINode::getIncomingBlock
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Definition: Instructions.h:2733

llvm::PHINode::getIncomingValue
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
Definition: Instructions.h:2713

llvm::PHINode::getIncomingValueNumForOperand
static unsigned getIncomingValueNumForOperand(unsigned i)
Definition: Instructions.h:2727

llvm::PHINode::getBasicBlockIndex
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
Definition: Instructions.h:2807

llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2709

llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:2673

llvm::PassRegistry::getPassRegistry
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24

llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:99

llvm::Pass::getAnalysisUsage
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:112

llvm::PointerIntPair
PointerIntPair - This class implements a pair of a pointer and small integer.
Definition: PointerIntPair.h:80

llvm::PoisonValue::get
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1885

llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:112

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:118

llvm::SCEVAddExpr
This node represents an addition of some number of SCEVs.
Definition: ScalarEvolutionExpressions.h:267

llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:348

llvm::SCEVAddRecExpr::getType
Type * getType() const
Definition: ScalarEvolutionExpressions.h:358

llvm::SCEVAddRecExpr::getStart
const SCEV * getStart() const
Definition: ScalarEvolutionExpressions.h:359

llvm::SCEVAddRecExpr::getStepRecurrence
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Definition: ScalarEvolutionExpressions.h:366

llvm::SCEVAddRecExpr::isAffine
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
Definition: ScalarEvolutionExpressions.h:376

llvm::SCEVAddRecExpr::getLoop
const Loop * getLoop() const
Definition: ScalarEvolutionExpressions.h:360

llvm::SCEVCastExpr
This is the base class for unary cast operator classes.
Definition: ScalarEvolutionExpressions.h:104

llvm::SCEVCommutativeExpr
This node is the base class for n'ary commutative operators.
Definition: ScalarEvolutionExpressions.h:248

llvm::SCEVConstant
This class represents a constant integer value.
Definition: ScalarEvolutionExpressions.h:61

llvm::SCEVConstant::getType
Type * getType() const
Definition: ScalarEvolutionExpressions.h:73

llvm::SCEVConstant::getValue
ConstantInt * getValue() const
Definition: ScalarEvolutionExpressions.h:70

llvm::SCEVConstant::getAPInt
const APInt & getAPInt() const
Definition: ScalarEvolutionExpressions.h:71

llvm::SCEVExpander
This class uses information about analyze scalars to rewrite expressions in canonical form.
Definition: ScalarEvolutionExpander.h:64

llvm::SCEVIntegralCastExpr
This is the base class for unary integral cast operator classes.
Definition: ScalarEvolutionExpressions.h:142

llvm::SCEVMulExpr
This node represents multiplication of some number of SCEVs.
Definition: ScalarEvolutionExpressions.h:291

llvm::SCEVNAryExpr
This node is a base class providing common functionality for n'ary operators.
Definition: ScalarEvolutionExpressions.h:197

llvm::SCEVNAryExpr::hasNoUnsignedWrap
bool hasNoUnsignedWrap() const
Definition: ScalarEvolutionExpressions.h:227

llvm::SCEVNAryExpr::hasNoSignedWrap
bool hasNoSignedWrap() const
Definition: ScalarEvolutionExpressions.h:231

llvm::SCEVNAryExpr::operands
ArrayRef< const SCEV * > operands() const
Definition: ScalarEvolutionExpressions.h:219

llvm::SCEVSMaxExpr
This class represents a signed maximum selection.
Definition: ScalarEvolutionExpressions.h:467

llvm::SCEVUDivExpr
This class represents a binary unsigned division operation.
Definition: ScalarEvolutionExpressions.h:305

llvm::SCEVUMaxExpr
This class represents an unsigned maximum selection.
Definition: ScalarEvolutionExpressions.h:479

llvm::SCEVUnknown
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
Definition: ScalarEvolutionExpressions.h:580

llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:72

llvm::SCEV::operands
LLVM_ABI ArrayRef< const SCEV * > operands() const
Return operands of this SCEV expression.
Definition: ScalarEvolution.cpp:417

llvm::SCEV::getExpressionSize
unsigned short getExpressionSize() const
Definition: ScalarEvolution.h:170

llvm::SCEV::isZero
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
Definition: ScalarEvolution.cpp:445

llvm::SCEV::getSCEVType
SCEVTypes getSCEVType() const
Definition: ScalarEvolution.h:141

llvm::SCEV::getType
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
Definition: ScalarEvolution.cpp:383

llvm::SCEV::FlagAnyWrap
@ FlagAnyWrap
Definition: ScalarEvolution.h:128

llvm::SIToFPInst
This class represents a cast from signed integer to floating point.
Definition: Instructions.h:4781

llvm::ScalarEvolutionWrapperPass
Definition: ScalarEvolution.h:2392

llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:448

llvm::ScalarEvolution::getBackedgeTakenCount
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
Definition: ScalarEvolution.cpp:8373

llvm::ScalarEvolution::getZero
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
Definition: ScalarEvolution.h:663

llvm::ScalarEvolution::getTypeSizeInBits
LLVM_ABI uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
Definition: ScalarEvolution.cpp:4473

llvm::ScalarEvolution::getConstant
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
Definition: ScalarEvolution.cpp:470

llvm::ScalarEvolution::getSCEV
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4572

llvm::ScalarEvolution::getNoopOrSignExtend
LLVM_ABI const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
Definition: ScalarEvolution.cpp:4767

llvm::ScalarEvolution::isLoopInvariant
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Definition: ScalarEvolution.cpp:14168

llvm::ScalarEvolution::getAddRecExpr
LLVM_ABI const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
Definition: ScalarEvolution.cpp:3667

llvm::ScalarEvolution::isSCEVable
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Definition: ScalarEvolution.cpp:4466

llvm::ScalarEvolution::getEffectiveSCEVType
LLVM_ABI Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
Definition: ScalarEvolution.cpp:4483

llvm::ScalarEvolution::getMinusSCEV
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
Definition: ScalarEvolution.cpp:4680

llvm::ScalarEvolution::getAnyExtendExpr
LLVM_ABI const SCEV * getAnyExtendExpr(const SCEV *Op, Type *Ty)
getAnyExtendExpr - Return a SCEV for the given operand extended with unspecified bits out to the give...
Definition: ScalarEvolution.cpp:2169

llvm::ScalarEvolution::containsUndefs
LLVM_ABI bool containsUndefs(const SCEV *S) const
Return true if the SCEV expression contains an undef value.
Definition: ScalarEvolution.cpp:13645

llvm::ScalarEvolution::getSignExtendExpr
LLVM_ABI const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
Definition: ScalarEvolution.cpp:1890

llvm::ScalarEvolution::getVScale
LLVM_ABI const SCEV * getVScale(Type *Ty)
Definition: ScalarEvolution.cpp:491

llvm::ScalarEvolution::hasComputableLoopEvolution
LLVM_ABI bool hasComputableLoopEvolution(const SCEV *S, const Loop *L)
Return true if the given SCEV changes value in a known way in the specified loop.
Definition: ScalarEvolution.cpp:14172

llvm::ScalarEvolution::getPointerBase
LLVM_ABI const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
Definition: ScalarEvolution.cpp:4848

llvm::ScalarEvolution::getMulExpr
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:3117

llvm::ScalarEvolution::getUnknown
LLVM_ABI const SCEV * getUnknown(Value *V)
Definition: ScalarEvolution.cpp:4436

llvm::ScalarEvolution::computeConstantDifference
LLVM_ABI std::optional< APInt > computeConstantDifference(const SCEV *LHS, const SCEV *RHS)
Compute LHS - RHS and returns the result as an APInt if it is a constant, and std::nullopt if it isn'...
Definition: ScalarEvolution.cpp:12131

llvm::ScalarEvolution::properlyDominates
LLVM_ABI bool properlyDominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV properly dominate the specified basic block.
Definition: ScalarEvolution.cpp:14255

llvm::ScalarEvolution::getAddExpr
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2523

llvm::ScalarEvolution::containsErasedValue
LLVM_ABI bool containsErasedValue(const SCEV *S) const
Return true if the SCEV expression contains a Value that has been optimised out and is now a nullptr.
Definition: ScalarEvolution.cpp:13654

llvm::ScalarEvolution::getContext
LLVMContext & getContext() const
Definition: ScalarEvolution.h:491

llvm::SelectInst
This class represents the LLVM 'select' instruction.
Definition: Instructions.h:1689

llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:59

llvm::SetVector::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:104

llvm::SetVector::end
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:119

llvm::SetVector::begin
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:109

llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:168

llvm::SmallBitVector
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
Definition: SmallBitVector.h:35

llvm::SmallBitVector::find_first
int find_first() const
Returns the index of the first set bit, -1 if none of the bits are set.
Definition: SmallBitVector.h:230

llvm::SmallBitVector::set_bits
iterator_range< const_set_bits_iterator > set_bits() const
Definition: SmallBitVector.h:183

llvm::SmallBitVector::find_next
int find_next(unsigned Prev) const
Returns the index of the next set bit following the "Prev" bit.
Definition: SmallBitVector.h:277

llvm::SmallBitVector::size
size_type size() const
Returns the number of bits in this bitvector.
Definition: SmallBitVector.h:195

llvm::SmallBitVector::resize
void resize(unsigned N, bool t=false)
Grow or shrink the bitvector.
Definition: SmallBitVector.h:332

llvm::SmallBitVector::count
size_type count() const
Returns the number of bits which are set.
Definition: SmallBitVector.h:200

llvm::SmallPtrSetImplBase::clear
void clear()
Definition: SmallPtrSet.h:102

llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:380

llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:470

llvm::SmallPtrSetImpl::insert_range
void insert_range(Range &&R)
Definition: SmallPtrSet.h:490

llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401

llvm::SmallPtrSet< const Loop *, 2 >

llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:356

llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134

llvm::SmallSet::clear
void clear()
Definition: SmallSet.h:209

llvm::SmallSet::insert
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:182

llvm::SmallVectorBase::empty
bool empty() const
Definition: SmallVector.h:82

llvm::SmallVectorBase::size
size_t size() const
Definition: SmallVector.h:79

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574

llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition: SmallVector.h:674

llvm::SmallVectorImpl::assign
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:705

llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:938

llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:664

llvm::SmallVectorImpl::erase
iterator erase(const_iterator CI)
Definition: SmallVector.h:738

llvm::SmallVectorImpl::const_iterator
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:579

llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:611

llvm::SmallVectorImpl::iterator
typename SuperClass::iterator iterator
Definition: SmallVector.h:578

llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:639

llvm::SmallVectorTemplateBase::pop_back
void pop_back()
Definition: SmallVector.h:426

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition: SmallVector.h:414

llvm::SmallVectorTemplateCommon::end
iterator end()
Definition: SmallVector.h:270

llvm::SmallVectorTemplateCommon::front
reference front()
Definition: SmallVector.h:300

llvm::SmallVectorTemplateCommon::begin
iterator begin()
Definition: SmallVector.h:268

llvm::SmallVectorTemplateCommon::back
reference back()
Definition: SmallVector.h:309

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197

llvm::StackOffset::get
static StackOffset get(int64_t Fixed, int64_t Scalable)
Definition: TypeSize.h:45

llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:296

llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:646

llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:285

llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2036

llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:219

llvm::TargetTransformInfo::getTgtMemIntrinsic
LLVM_ABI bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
Definition: TargetTransformInfo.cpp:1296

llvm::TargetTransformInfo::shouldDropLSRSolutionIfLessProfitable
LLVM_ABI bool shouldDropLSRSolutionIfLessProfitable() const
Return true if LSR should drop a found solution if it's calculated to be less profitable than the bas...
Definition: TargetTransformInfo.cpp:444

llvm::TargetTransformInfo::isLSRCostLess
LLVM_ABI bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const
Return true if LSR cost of C1 is lower than C2.
Definition: TargetTransformInfo.cpp:435

llvm::TargetTransformInfo::isProfitableLSRChainElement
LLVM_ABI bool isProfitableLSRChainElement(Instruction *I) const
Definition: TargetTransformInfo.cpp:448

llvm::TargetTransformInfo::LSRWithInstrQueries
LLVM_ABI bool LSRWithInstrQueries() const
Return true if the loop strength reduce pass should make Instruction* based TTI queries to isLegalAdd...
Definition: TargetTransformInfo.cpp:572

llvm::TargetTransformInfo::isIndexedStoreLegal
LLVM_ABI bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const
Definition: TargetTransformInfo.cpp:1353

llvm::TargetTransformInfo::getRegisterClassForType
LLVM_ABI unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
Definition: TargetTransformInfo.cpp:771

llvm::TargetTransformInfo::isLegalAddressingMode
LLVM_ABI bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr, int64_t ScalableOffset=0) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
Definition: TargetTransformInfo.cpp:425

llvm::TargetTransformInfo::isIndexedLoadLegal
LLVM_ABI bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const
Definition: TargetTransformInfo.cpp:1348

llvm::TargetTransformInfo::isLegalICmpImmediate
LLVM_ABI bool isLegalICmpImmediate(int64_t Imm) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
Definition: TargetTransformInfo.cpp:421

llvm::TargetTransformInfo::isTypeLegal
LLVM_ABI bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
Definition: TargetTransformInfo.cpp:586

llvm::TargetTransformInfo::isLegalAddImmediate
LLVM_ABI bool isLegalAddImmediate(int64_t Imm) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
Definition: TargetTransformInfo.cpp:413

llvm::TargetTransformInfo::canSaveCmp
LLVM_ABI bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const
Return true if the target can save a compare for loop count, for example hardware loop saves a compar...
Definition: TargetTransformInfo.cpp:456

llvm::TargetTransformInfo::isHardwareLoopProfitable
LLVM_ABI bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
Query the target whether it would be profitable to convert the given loop into a hardware loop.
Definition: TargetTransformInfo.cpp:359

llvm::TargetTransformInfo::getNumberOfRegisters
LLVM_ABI unsigned getNumberOfRegisters(unsigned ClassID) const
Definition: TargetTransformInfo.cpp:762

llvm::TargetTransformInfo::isLegalAddScalableImmediate
LLVM_ABI bool isLegalAddScalableImmediate(int64_t Imm) const
Return true if adding the specified scalable immediate is legal, that is the target has add instructi...
Definition: TargetTransformInfo.cpp:417

llvm::TargetTransformInfo::isNumRegsMajorCostOfLSR
LLVM_ABI bool isNumRegsMajorCostOfLSR() const
Return true if LSR major cost is number of registers.
Definition: TargetTransformInfo.cpp:440

llvm::TargetTransformInfo::MIM_PostInc
@ MIM_PostInc
Post-incrementing.
Definition: TargetTransformInfo.h:1766

llvm::TargetTransformInfo::canMacroFuseCmp
LLVM_ABI bool canMacroFuseCmp() const
Return true if the target can fuse a compare and branch.
Definition: TargetTransformInfo.cpp:452

llvm::TargetTransformInfo::AddressingModeKind
AddressingModeKind
Definition: TargetTransformInfo.h:797

llvm::TargetTransformInfo::AMK_PostIndexed
@ AMK_PostIndexed
Definition: TargetTransformInfo.h:799

llvm::TargetTransformInfo::AMK_PreIndexed
@ AMK_PreIndexed
Definition: TargetTransformInfo.h:798

llvm::TargetTransformInfo::AMK_None
@ AMK_None
Definition: TargetTransformInfo.h:800

llvm::TargetTransformInfo::getScalingFactorCost
LLVM_ABI InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
Definition: TargetTransformInfo.cpp:563

llvm::TargetTransformInfo::isTruncateFree
LLVM_ABI bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it's free to truncate a value of type Ty1 to type Ty2.
Definition: TargetTransformInfo.cpp:576

llvm::TruncInst
This class represents a truncation of integer types.
Definition: Instructions.h:4555

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45

llvm::Type::getFPMantissaWidth
LLVM_ABI int getFPMantissaWidth() const
Return the width of the mantissa of this type.

llvm::Type::getInt8Ty
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)

llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:267

llvm::Type::getVoidTy
static LLVM_ABI Type * getVoidTy(LLVMContext &C)

llvm::Type::getPointerAddressSpace
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.

llvm::Type::getIntegerBitWidth
LLVM_ABI unsigned getIntegerBitWidth() const

llvm::UIToFPInst
This class represents a cast unsigned integer to floating point.
Definition: Instructions.h:4749

llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35

llvm::User
Definition: User.h:44

llvm::User::operands
op_range operands()
Definition: User.h:292

llvm::User::replaceUsesOfWith
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21

llvm::User::setOperand
void setOperand(unsigned i, Value *Val)
Definition: User.h:237

llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:232

llvm::User::op_end
op_iterator op_end()
Definition: User.h:286

llvm::ValueAsMetadata::get
static LLVM_ABI ValueAsMetadata * get(Value *V)
Definition: Metadata.cpp:502

llvm::Value
LLVM Value Representation.
Definition: Value.h:75

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256

llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:439

llvm::Value::replaceAllUsesWith
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:546

llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:426

llvm::Value::getContext
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1098

llvm::Value::uses
iterator_range< use_iterator > uses()
Definition: Value.h:380

llvm::WeakVH
A nullable Value handle that is nullable.
Definition: ValueHandle.h:145

llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:400

llvm::cl::opt
Definition: CommandLine.h:1429

llvm::detail::DenseSetImpl::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:194

llvm::detail::DenseSetImpl::count
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:174

llvm::details::FixedOrScalableQuantity
Definition: TypeSize.h:89

llvm::ilist_detail::node_parent_access::getParent
const ParentTy * getParent() const
Definition: ilist_node.h:34

llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:134

llvm::iterator_range
A range adaptor for a pair of iterators.
Definition: iterator_range.h:42

llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53

uint64_t

unsigned

iterator_range.h
This provides a very simple, boring adaptor for a begin and end iterator into a range type.

ErrorHandling.h

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:164

false
Definition: MachinePipeliner.cpp:239

llvm::AArch64CC::NE
@ NE
Definition: AArch64BaseInfo.h:256

llvm::AArch64::Fixups
Fixups
Definition: AArch64FixupKinds.h:17

llvm::AMDGPU::PALMD::Key
Key
PAL metadata keys.
Definition: AMDGPUMetadata.h:488

llvm::AMDGPU::Imm
@ Imm
Definition: AMDGPURegBankLegalizeRules.h:129

llvm::ARM_AM::add
@ add
Definition: ARMAddressingModes.h:39

llvm::ARM::ProfileKind::M
@ M

llvm::COFF::Entry
@ Entry
Definition: COFF.h:862

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34

llvm::M68k::MemAddrModeKind::U
@ U

llvm::M68k::MemAddrModeKind::V
@ V

llvm::M68k::MemAddrModeKind::u
@ u

llvm::M68k::MemAddrModeKind::f
@ f

llvm::M68k::MemAddrModeKind::K
@ K

llvm::M68k::MemAddrModeKind::L
@ L

llvm::MCID::RegSequence
@ RegSequence
Definition: MCInstrDesc.h:183

llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:400

llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:399

llvm::SCEVPatternMatch::m_SCEVVScale
class_match< const SCEVVScale > m_SCEVVScale()
Definition: ScalarEvolutionPatternMatch.h:67

llvm::SCEVPatternMatch::m_scev_APInt
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
Definition: ScalarEvolutionPatternMatch.h:148

llvm::SCEVPatternMatch::m_SCEVConstant
class_match< const SCEVConstant > m_SCEVConstant()
Definition: ScalarEvolutionPatternMatch.h:64

llvm::SCEVPatternMatch::m_scev_AffineAddRec
SCEVAffineAddRec_match< Op0_t, Op1_t, class_match< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
Definition: ScalarEvolutionPatternMatch.h:255

llvm::SCEVPatternMatch::m_scev_Mul
SCEVBinaryExpr_match< SCEVMulExpr, Op0_t, Op1_t > m_scev_Mul(const Op0_t &Op0, const Op1_t &Op1)
Definition: ScalarEvolutionPatternMatch.h:214

llvm::SCEVPatternMatch::match
bool match(const SCEV *S, const Pattern &P)
Definition: ScalarEvolutionPatternMatch.h:21

llvm::SCEVPatternMatch::m_Loop
class_match< const Loop > m_Loop()
Definition: ScalarEvolutionPatternMatch.h:224

llvm::SCEVPatternMatch::m_scev_SpecificInt
cst_pred_ty< is_specific_cst > m_scev_SpecificInt(uint64_t V)
Match an SCEV constant with a plain unsigned integer.
Definition: ScalarEvolutionPatternMatch.h:117

llvm::SCEVPatternMatch::m_SCEV
class_match< const SCEV > m_SCEV()
Definition: ScalarEvolutionPatternMatch.h:63

llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33

llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:621

llvm::X86::FirstMacroFusionInstKind::Cmp
@ Cmp

llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:138

llvm::cl::values
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:712

llvm::cl::BOU_FALSE
@ BOU_FALSE
Definition: CommandLine.h:638

llvm::cl::BOU_UNSET
@ BOU_UNSET
Definition: CommandLine.h:638

llvm::cl::BOU_TRUE
@ BOU_TRUE
Definition: CommandLine.h:638

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444

llvm::codeview::FrameCookieKind::Copy
@ Copy

llvm::dwarf::DW_OP_LLVM_arg
@ DW_OP_LLVM_arg
Only used in LLVM metadata.
Definition: Dwarf.h:148

llvm::dwarf::DW_OP_LLVM_convert
@ DW_OP_LLVM_convert
Only used in LLVM metadata.
Definition: Dwarf.h:144

llvm::lltok::Kind
Kind
Definition: LLToken.h:18

llvm::logicalview::LVCompareKind::Types
@ Types

llvm::logicalview::LVAttributeKind::Inserted
@ Inserted

llvm::msgpack::Type::Map
@ Map

llvm::numbers::e
constexpr double e
Definition: MathExtras.h:47

llvm::objcarc::Sequence
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition: PtrState.h:41

llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:139

llvm::pdb::DbgHeaderType::Max
@ Max

llvm::rdf::Phi
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390

llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58

llvm::sframe::BaseReg
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition: SFrame.h:77

llvm::sys::path::begin
LLVM_ABI const_iterator begin(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226

llvm::sys::path::end
LLVM_ABI const_iterator end(StringRef path LLVM_LIFETIME_BOUND)
Get end iterator over path.
Definition: Path.cpp:235

llvm::telemetry::KindType
unsigned KindType
For isa, dyn_cast, etc operations on TelemetryInfo.
Definition: Telemetry.h:85

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:338

llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:877

llvm::Offset
@ Offset
Definition: DWP.cpp:477

llvm::find
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1770

llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744

llvm::PseudoProbeType::Block
@ Block

llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36

llvm::salvageDebugInfo
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition: Utils.cpp:1723

llvm::operator!=
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2113

llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2155

llvm::LoopSimplifyID
LLVM_ABI char & LoopSimplifyID
Definition: LoopSimplify.cpp:784

llvm::operator==
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
Definition: AddressRanges.h:151

llvm::countr_zero
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:157

llvm::matchSimpleRecurrence
LLVM_ABI bool matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, Value *&Start, Value *&Step)
Attempt to match a simple first order recurrence cycle of the form: iv = phi Ty [Start,...
Definition: ValueTracking.cpp:9142

llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751

llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:336

llvm::DeleteDeadPHIs
LLVM_ABI bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
Definition: BasicBlockUtils.cpp:163

llvm::initializeLoopStrengthReducePass
LLVM_ABI void initializeLoopStrengthReducePass(PassRegistry &)

llvm::reverse
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428

llvm::denormalizeForPostIncUse
LLVM_ABI const SCEV * denormalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE)
Denormalize S to be post-increment for all loops present in Loops.
Definition: ScalarEvolutionNormalization.cpp:120

llvm::get
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
Definition: PointerIntPair.h:269

llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1669

llvm::dbgs
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207

llvm::none_of
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1758

llvm::ConstantFoldCastOperand
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
Definition: ConstantFolding.cpp:1482

llvm::SplitLandingPadPredecessors
LLVM_ABI void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
Definition: BasicBlockUtils.cpp:1375

llvm::normalizeForPostIncUse
LLVM_ABI const SCEV * normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE, bool CheckInvertible=true)
Normalize S to be post-increment for all loops present in Loops.
Definition: ScalarEvolutionNormalization.cpp:97

llvm::errs
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:908

llvm::IRMemLocation::First
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.

llvm::RecurKind::Add
@ Add
Sum of integers.

llvm::count
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1973

llvm::createLoopStrengthReducePass
LLVM_ABI Pass * createLoopStrengthReducePass()
Definition: LoopStrengthReduce.cpp:7133

llvm::SplitCriticalEdge
LLVM_ABI BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
Definition: BreakCriticalEdges.cpp:101

llvm::RecursivelyDeleteTriviallyDeadInstructionsPermissive
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
Definition: Local.cpp:548

llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:223

llvm::PseudoProbeReservedId::Last
@ Last

llvm::formLCSSAForInstructions
LLVM_ABI bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
Definition: LCSSA.cpp:308

llvm::getLoopPassPreservedAnalyses
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition: LoopAnalysisManager.cpp:141

llvm::find_if
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1777

llvm::rewriteLoopExitValues
LLVM_ABI int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
Definition: LoopUtils.cpp:1574

llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1916

llvm::scAddRecExpr
@ scAddRecExpr
Definition: ScalarEvolutionExpressions.h:49

llvm::scAddExpr
@ scAddExpr
Definition: ScalarEvolutionExpressions.h:46

llvm::scVScale
@ scVScale
Definition: ScalarEvolutionExpressions.h:42

llvm::scUnknown
@ scUnknown
Definition: ScalarEvolutionExpressions.h:56

llvm::scConstant
@ scConstant
Definition: ScalarEvolutionExpressions.h:41

llvm::scSignExtend
@ scSignExtend
Definition: ScalarEvolutionExpressions.h:45

llvm::scTruncate
@ scTruncate
Definition: ScalarEvolutionExpressions.h:43

llvm::scZeroExtend
@ scZeroExtend
Definition: ScalarEvolutionExpressions.h:44

llvm::scMulExpr
@ scMulExpr
Definition: ScalarEvolutionExpressions.h:47

llvm::UnusedIndVarInLoop
@ UnusedIndVarInLoop
Definition: LoopUtils.h:495

llvm::InlinerFunctionImportStatsOpts::Basic
@ Basic

llvm::filterDbgVars
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
Definition: DebugProgramInstruction.h:562

llvm::SCEVExprContains
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
Definition: ScalarEvolutionExpressions.h:723

std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858

raw_ostream.h

N
#define N

WorkItem
Definition: WinEHPrepare.cpp:235

llvm::CriticalEdgeSplittingOptions
Option class for critical edge splitting.
Definition: BasicBlockUtils.h:149

llvm::HardwareLoopInfo
Attributes of a target dependent hardware loop.
Definition: TargetTransformInfo.h:98

llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:54

llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:59

llvm::LoopStandardAnalysisResults::MSSA
MemorySSA * MSSA
Definition: LoopAnalysisManager.h:64

llvm::LoopStandardAnalysisResults::TTI
TargetTransformInfo & TTI
Definition: LoopAnalysisManager.h:61

llvm::LoopStandardAnalysisResults::AC
AssumptionCache & AC
Definition: LoopAnalysisManager.h:56

llvm::LoopStandardAnalysisResults::TLI
TargetLibraryInfo & TLI
Definition: LoopAnalysisManager.h:60

llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:58

llvm::LoopStandardAnalysisResults::DT
DominatorTree & DT
Definition: LoopAnalysisManager.h:57

llvm::MemIntrinsicInfo
Information about a load/store intrinsic defined by the target.
Definition: TargetTransformInfo.h:73

llvm::MemIntrinsicInfo::PtrVal
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
Definition: TargetTransformInfo.h:78

llvm::TargetTransformInfo::LSRCost
Definition: TargetTransformInfo.h:533

llvm::cl::desc
Definition: CommandLine.h:410